Wed, 17 Jun 2015 17:48:25 -0700
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
Reviewed-by: kvn, jrose, phh
1.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Jul 31 14:28:51 2019 -0400 1.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Wed Jun 17 17:48:25 2015 -0700 1.3 @@ -194,6 +194,11 @@ 1.4 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1.5 } 1.6 1.7 + if (UseGHASHIntrinsics) { 1.8 + warning("GHASH intrinsics are not available on this CPU"); 1.9 + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1.10 + } 1.11 + 1.12 if (has_vshasig()) { 1.13 if (FLAG_IS_DEFAULT(UseSHA)) { 1.14 UseSHA = true;
2.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp Wed Jul 31 14:28:51 2019 -0400 2.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Wed Jun 17 17:48:25 2015 -0700 2.3 @@ -1,5 +1,5 @@ 2.4 /* 2.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 2.6 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 2.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.8 * 2.9 * This code is free software; you can redistribute it and/or modify it 2.10 @@ -129,6 +129,7 @@ 2.11 flog3_op3 = 0x36, 2.12 edge_op3 = 0x36, 2.13 fsrc_op3 = 0x36, 2.14 + xmulx_op3 = 0x36, 2.15 impdep2_op3 = 0x37, 2.16 stpartialf_op3 = 0x37, 2.17 jmpl_op3 = 0x38, 2.18 @@ -220,6 +221,8 @@ 2.19 mdtox_opf = 0x110, 2.20 mstouw_opf = 0x111, 2.21 mstosw_opf = 0x113, 2.22 + xmulx_opf = 0x115, 2.23 + xmulxhi_opf = 0x116, 2.24 mxtod_opf = 0x118, 2.25 mwtos_opf = 0x119, 2.26 2.27 @@ -1212,6 +1215,9 @@ 2.28 void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); } 2.29 void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); } 2.30 2.31 + void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); } 2.32 + void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); } 2.33 + 2.34 // Crypto SHA instructions 2.35 2.36 void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
3.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Jul 31 14:28:51 2019 -0400 3.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Jun 17 17:48:25 2015 -0700 3.3 @@ -4788,6 +4788,130 @@ 3.4 return start; 3.5 } 3.6 3.7 + /* Single and multi-block ghash operations */ 3.8 + address generate_ghash_processBlocks() { 3.9 + __ align(CodeEntryAlignment); 3.10 + Label L_ghash_loop, L_aligned, L_main; 3.11 + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); 3.12 + address start = __ pc(); 3.13 + 3.14 + Register state = I0; 3.15 + Register subkeyH = I1; 3.16 + Register data = I2; 3.17 + Register len = I3; 3.18 + 3.19 + __ save_frame(0); 3.20 + 3.21 + __ ldx(state, 0, O0); 3.22 + __ ldx(state, 8, O1); 3.23 + 3.24 + // Loop label for multiblock operations 3.25 + __ BIND(L_ghash_loop); 3.26 + 3.27 + // Check if 'data' is unaligned 3.28 + __ andcc(data, 7, G1); 3.29 + __ br(Assembler::zero, false, Assembler::pt, L_aligned); 3.30 + __ delayed()->nop(); 3.31 + 3.32 + Register left_shift = L1; 3.33 + Register right_shift = L2; 3.34 + Register data_ptr = L3; 3.35 + 3.36 + // Get left and right shift values in bits 3.37 + __ sll(G1, LogBitsPerByte, left_shift); 3.38 + __ mov(64, right_shift); 3.39 + __ sub(right_shift, left_shift, right_shift); 3.40 + 3.41 + // Align to read 'data' 3.42 + __ sub(data, G1, data_ptr); 3.43 + 3.44 + // Load first 8 bytes of 'data' 3.45 + __ ldx(data_ptr, 0, O4); 3.46 + __ sllx(O4, left_shift, O4); 3.47 + __ ldx(data_ptr, 8, O5); 3.48 + __ srlx(O5, right_shift, G4); 3.49 + __ bset(G4, O4); 3.50 + 3.51 + // Load second 8 bytes of 'data' 3.52 + __ sllx(O5, left_shift, O5); 3.53 + __ ldx(data_ptr, 16, G4); 3.54 + __ srlx(G4, right_shift, G4); 3.55 + __ ba(L_main); 3.56 + __ delayed()->bset(G4, O5); 3.57 + 3.58 + // If 'data' is aligned, load normally 3.59 + __ BIND(L_aligned); 3.60 + __ ldx(data, 0, O4); 3.61 + __ ldx(data, 8, O5); 3.62 + 3.63 + __ BIND(L_main); 3.64 + __ ldx(subkeyH, 0, O2); 3.65 + __ ldx(subkeyH, 8, O3); 3.66 + 3.67 + __ xor3(O0, O4, O0); 3.68 + __ xor3(O1, O5, O1); 3.69 + 3.70 + __ xmulxhi(O0, O3, G3); 3.71 + __ xmulx(O0, O2, O5); 3.72 + __ xmulxhi(O1, O2, G4); 3.73 + __ xmulxhi(O1, O3, G5); 3.74 + __ xmulx(O0, O3, G1); 3.75 + __ xmulx(O1, O3, G2); 3.76 + __ xmulx(O1, O2, O3); 3.77 + __ xmulxhi(O0, O2, O4); 3.78 + 3.79 + __ mov(0xE1, O0); 3.80 + __ sllx(O0, 56, O0); 3.81 + 3.82 + __ xor3(O5, G3, O5); 3.83 + __ xor3(O5, G4, O5); 3.84 + __ xor3(G5, G1, G1); 3.85 + __ xor3(G1, O3, G1); 3.86 + __ srlx(G2, 63, O1); 3.87 + __ srlx(G1, 63, G3); 3.88 + __ sllx(G2, 63, O3); 3.89 + __ sllx(G2, 58, O2); 3.90 + __ xor3(O3, O2, O2); 3.91 + 3.92 + __ sllx(G1, 1, G1); 3.93 + __ or3(G1, O1, G1); 3.94 + 3.95 + __ xor3(G1, O2, G1); 3.96 + 3.97 + __ sllx(G2, 1, G2); 3.98 + 3.99 + __ xmulxhi(G1, O0, O1); 3.100 + __ xmulx(G1, O0, O2); 3.101 + __ xmulxhi(G2, O0, O3); 3.102 + __ xmulx(G2, O0, G1); 3.103 + 3.104 + __ xor3(O4, O1, O4); 3.105 + __ xor3(O5, O2, O5); 3.106 + __ xor3(O5, O3, O5); 3.107 + 3.108 + __ sllx(O4, 1, O2); 3.109 + __ srlx(O5, 63, O3); 3.110 + 3.111 + __ or3(O2, O3, O0); 3.112 + 3.113 + __ sllx(O5, 1, O1); 3.114 + __ srlx(G1, 63, O2); 3.115 + __ or3(O1, O2, O1); 3.116 + __ xor3(O1, G3, O1); 3.117 + 3.118 + __ deccc(len); 3.119 + __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop); 3.120 + __ delayed()->add(data, 16, data); 3.121 + 3.122 + __ stx(O0, I0, 0); 3.123 + __ stx(O1, I0, 8); 3.124 + 3.125 + __ ret(); 3.126 + __ delayed()->restore(); 3.127 + 3.128 + return start; 3.129 + } 3.130 + 3.131 void generate_initial() { 3.132 // Generates all stubs and initializes the entry points 3.133 3.134 @@ -4860,6 +4984,10 @@ 3.135 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 3.136 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 3.137 } 3.138 + // generate GHASH intrinsics code 3.139 + if (UseGHASHIntrinsics) { 3.140 + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 3.141 + } 3.142 3.143 // generate SHA1/SHA256/SHA512 intrinsics code 3.144 if (UseSHA1Intrinsics) {
4.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Jul 31 14:28:51 2019 -0400 4.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Jun 17 17:48:25 2015 -0700 4.3 @@ -319,6 +319,17 @@ 4.4 } 4.5 } 4.6 4.7 + // GHASH/GCM intrinsics 4.8 + if (has_vis3() && (UseVIS > 2)) { 4.9 + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 4.10 + UseGHASHIntrinsics = true; 4.11 + } 4.12 + } else if (UseGHASHIntrinsics) { 4.13 + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 4.14 + warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled"); 4.15 + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 4.16 + } 4.17 + 4.18 // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times 4.19 if (has_sha1() || has_sha256() || has_sha512()) { 4.20 if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
5.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Wed Jul 31 14:28:51 2019 -0400 5.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Jun 17 17:48:25 2015 -0700 5.3 @@ -2575,6 +2575,15 @@ 5.4 emit_int8(shift); 5.5 } 5.6 5.7 +void Assembler::pslldq(XMMRegister dst, int shift) { 5.8 + // Shift left 128 bit value in xmm register by number of bytes. 5.9 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 5.10 + int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66); 5.11 + emit_int8(0x73); 5.12 + emit_int8((unsigned char)(0xC0 | encode)); 5.13 + emit_int8(shift); 5.14 +} 5.15 + 5.16 void Assembler::ptest(XMMRegister dst, Address src) { 5.17 assert(VM_Version::supports_sse4_1(), ""); 5.18 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
6.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Wed Jul 31 14:28:51 2019 -0400 6.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Jun 17 17:48:25 2015 -0700 6.3 @@ -1527,6 +1527,8 @@ 6.4 6.5 // Shift Right by bytes Logical DoubleQuadword Immediate 6.6 void psrldq(XMMRegister dst, int shift); 6.7 + // Shift Left by bytes Logical DoubleQuadword Immediate 6.8 + void pslldq(XMMRegister dst, int shift); 6.9 6.10 // Logical Compare 128bit 6.11 void ptest(XMMRegister dst, XMMRegister src);
7.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Jul 31 14:28:51 2019 -0400 7.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Jun 17 17:48:25 2015 -0700 7.3 @@ -2719,6 +2719,167 @@ 7.4 return start; 7.5 } 7.6 7.7 + // byte swap x86 long 7.8 + address generate_ghash_long_swap_mask() { 7.9 + __ align(CodeEntryAlignment); 7.10 + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); 7.11 + address start = __ pc(); 7.12 + __ emit_data(0x0b0a0908, relocInfo::none, 0); 7.13 + __ emit_data(0x0f0e0d0c, relocInfo::none, 0); 7.14 + __ emit_data(0x03020100, relocInfo::none, 0); 7.15 + __ emit_data(0x07060504, relocInfo::none, 0); 7.16 + 7.17 + return start; 7.18 + } 7.19 + 7.20 + // byte swap x86 byte array 7.21 + address generate_ghash_byte_swap_mask() { 7.22 + __ align(CodeEntryAlignment); 7.23 + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); 7.24 + address start = __ pc(); 7.25 + __ emit_data(0x0c0d0e0f, relocInfo::none, 0); 7.26 + __ emit_data(0x08090a0b, relocInfo::none, 0); 7.27 + __ emit_data(0x04050607, relocInfo::none, 0); 7.28 + __ emit_data(0x00010203, relocInfo::none, 0); 7.29 + return start; 7.30 + } 7.31 + 7.32 + /* Single and multi-block ghash operations */ 7.33 + address generate_ghash_processBlocks() { 7.34 + assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); 7.35 + __ align(CodeEntryAlignment); 7.36 + Label L_ghash_loop, L_exit; 7.37 + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); 7.38 + address start = __ pc(); 7.39 + 7.40 + const Register state = rdi; 7.41 + const Register subkeyH = rsi; 7.42 + const Register data = rdx; 7.43 + const Register blocks = rcx; 7.44 + 7.45 + const Address state_param(rbp, 8+0); 7.46 + const Address subkeyH_param(rbp, 8+4); 7.47 + const Address data_param(rbp, 8+8); 7.48 + const Address blocks_param(rbp, 8+12); 7.49 + 7.50 + const XMMRegister xmm_temp0 = xmm0; 7.51 + const XMMRegister xmm_temp1 = xmm1; 7.52 + const XMMRegister xmm_temp2 = xmm2; 7.53 + const XMMRegister xmm_temp3 = xmm3; 7.54 + const XMMRegister xmm_temp4 = xmm4; 7.55 + const XMMRegister xmm_temp5 = xmm5; 7.56 + const XMMRegister xmm_temp6 = xmm6; 7.57 + const XMMRegister xmm_temp7 = xmm7; 7.58 + 7.59 + __ enter(); 7.60 + 7.61 + __ movptr(state, state_param); 7.62 + __ movptr(subkeyH, subkeyH_param); 7.63 + __ movptr(data, data_param); 7.64 + __ movptr(blocks, blocks_param); 7.65 + 7.66 + __ movdqu(xmm_temp0, Address(state, 0)); 7.67 + __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 7.68 + 7.69 + __ movdqu(xmm_temp1, Address(subkeyH, 0)); 7.70 + __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 7.71 + 7.72 + __ BIND(L_ghash_loop); 7.73 + __ movdqu(xmm_temp2, Address(data, 0)); 7.74 + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); 7.75 + 7.76 + __ pxor(xmm_temp0, xmm_temp2); 7.77 + 7.78 + // 7.79 + // Multiply with the hash key 7.80 + // 7.81 + __ movdqu(xmm_temp3, xmm_temp0); 7.82 + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 7.83 + __ movdqu(xmm_temp4, xmm_temp0); 7.84 + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 7.85 + 7.86 + __ movdqu(xmm_temp5, xmm_temp0); 7.87 + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 7.88 + __ movdqu(xmm_temp6, xmm_temp0); 7.89 + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 7.90 + 7.91 + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 7.92 + 7.93 + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 7.94 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right 7.95 + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left 7.96 + __ pxor(xmm_temp3, xmm_temp5); 7.97 + __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result 7.98 + // of the carry-less multiplication of 7.99 + // xmm0 by xmm1. 7.100 + 7.101 + // We shift the result of the multiplication by one bit position 7.102 + // to the left to cope for the fact that the bits are reversed. 7.103 + __ movdqu(xmm_temp7, xmm_temp3); 7.104 + __ movdqu(xmm_temp4, xmm_temp6); 7.105 + __ pslld (xmm_temp3, 1); 7.106 + __ pslld(xmm_temp6, 1); 7.107 + __ psrld(xmm_temp7, 31); 7.108 + __ psrld(xmm_temp4, 31); 7.109 + __ movdqu(xmm_temp5, xmm_temp7); 7.110 + __ pslldq(xmm_temp4, 4); 7.111 + __ pslldq(xmm_temp7, 4); 7.112 + __ psrldq(xmm_temp5, 12); 7.113 + __ por(xmm_temp3, xmm_temp7); 7.114 + __ por(xmm_temp6, xmm_temp4); 7.115 + __ por(xmm_temp6, xmm_temp5); 7.116 + 7.117 + // 7.118 + // First phase of the reduction 7.119 + // 7.120 + // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts 7.121 + // independently. 7.122 + __ movdqu(xmm_temp7, xmm_temp3); 7.123 + __ movdqu(xmm_temp4, xmm_temp3); 7.124 + __ movdqu(xmm_temp5, xmm_temp3); 7.125 + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 7.126 + __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 7.127 + __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 7.128 + __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions 7.129 + __ pxor(xmm_temp7, xmm_temp5); 7.130 + __ movdqu(xmm_temp4, xmm_temp7); 7.131 + __ pslldq(xmm_temp7, 12); 7.132 + __ psrldq(xmm_temp4, 4); 7.133 + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete 7.134 + 7.135 + // 7.136 + // Second phase of the reduction 7.137 + // 7.138 + // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these 7.139 + // shift operations. 7.140 + __ movdqu(xmm_temp2, xmm_temp3); 7.141 + __ movdqu(xmm_temp7, xmm_temp3); 7.142 + __ movdqu(xmm_temp5, xmm_temp3); 7.143 + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 7.144 + __ psrld(xmm_temp7, 2); // packed left shifting >> 2 7.145 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 7.146 + __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions 7.147 + __ pxor(xmm_temp2, xmm_temp5); 7.148 + __ pxor(xmm_temp2, xmm_temp4); 7.149 + __ pxor(xmm_temp3, xmm_temp2); 7.150 + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 7.151 + 7.152 + __ decrement(blocks); 7.153 + __ jcc(Assembler::zero, L_exit); 7.154 + __ movdqu(xmm_temp0, xmm_temp6); 7.155 + __ addptr(data, 16); 7.156 + __ jmp(L_ghash_loop); 7.157 + 7.158 + __ BIND(L_exit); 7.159 + // Byte swap 16-byte result 7.160 + __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 7.161 + __ movdqu(Address(state, 0), xmm_temp6); // store the result 7.162 + 7.163 + __ leave(); 7.164 + __ ret(0); 7.165 + return start; 7.166 + } 7.167 + 7.168 /** 7.169 * Arguments: 7.170 * 7.171 @@ -3018,6 +3179,13 @@ 7.172 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); 7.173 } 7.174 7.175 + // Generate GHASH intrinsics code 7.176 + if (UseGHASHIntrinsics) { 7.177 + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 7.178 + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 7.179 + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 7.180 + } 7.181 + 7.182 // Safefetch stubs. 7.183 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 7.184 &StubRoutines::_safefetch32_fault_pc,
8.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Jul 31 14:28:51 2019 -0400 8.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Jun 17 17:48:25 2015 -0700 8.3 @@ -3639,6 +3639,175 @@ 8.4 return start; 8.5 } 8.6 8.7 + 8.8 + // byte swap x86 long 8.9 + address generate_ghash_long_swap_mask() { 8.10 + __ align(CodeEntryAlignment); 8.11 + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); 8.12 + address start = __ pc(); 8.13 + __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none ); 8.14 + __ emit_data64(0x0706050403020100, relocInfo::none ); 8.15 + return start; 8.16 + } 8.17 + 8.18 + // byte swap x86 byte array 8.19 + address generate_ghash_byte_swap_mask() { 8.20 + __ align(CodeEntryAlignment); 8.21 + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); 8.22 + address start = __ pc(); 8.23 + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none ); 8.24 + __ emit_data64(0x0001020304050607, relocInfo::none ); 8.25 + return start; 8.26 + } 8.27 + 8.28 + /* Single and multi-block ghash operations */ 8.29 + address generate_ghash_processBlocks() { 8.30 + __ align(CodeEntryAlignment); 8.31 + Label L_ghash_loop, L_exit; 8.32 + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); 8.33 + address start = __ pc(); 8.34 + 8.35 + const Register state = c_rarg0; 8.36 + const Register subkeyH = c_rarg1; 8.37 + const Register data = c_rarg2; 8.38 + const Register blocks = c_rarg3; 8.39 + 8.40 +#ifdef _WIN64 8.41 + const int XMM_REG_LAST = 10; 8.42 +#endif 8.43 + 8.44 + const XMMRegister xmm_temp0 = xmm0; 8.45 + const XMMRegister xmm_temp1 = xmm1; 8.46 + const XMMRegister xmm_temp2 = xmm2; 8.47 + const XMMRegister xmm_temp3 = xmm3; 8.48 + const XMMRegister xmm_temp4 = xmm4; 8.49 + const XMMRegister xmm_temp5 = xmm5; 8.50 + const XMMRegister xmm_temp6 = xmm6; 8.51 + const XMMRegister xmm_temp7 = xmm7; 8.52 + const XMMRegister xmm_temp8 = xmm8; 8.53 + const XMMRegister xmm_temp9 = xmm9; 8.54 + const XMMRegister xmm_temp10 = xmm10; 8.55 + 8.56 + __ enter(); 8.57 + 8.58 +#ifdef _WIN64 8.59 + // save the xmm registers which must be preserved 6-10 8.60 + __ subptr(rsp, -rsp_after_call_off * wordSize); 8.61 + for (int i = 6; i <= XMM_REG_LAST; i++) { 8.62 + __ movdqu(xmm_save(i), as_XMMRegister(i)); 8.63 + } 8.64 +#endif 8.65 + 8.66 + __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 8.67 + 8.68 + __ movdqu(xmm_temp0, Address(state, 0)); 8.69 + __ pshufb(xmm_temp0, xmm_temp10); 8.70 + 8.71 + 8.72 + __ BIND(L_ghash_loop); 8.73 + __ movdqu(xmm_temp2, Address(data, 0)); 8.74 + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); 8.75 + 8.76 + __ movdqu(xmm_temp1, Address(subkeyH, 0)); 8.77 + __ pshufb(xmm_temp1, xmm_temp10); 8.78 + 8.79 + __ pxor(xmm_temp0, xmm_temp2); 8.80 + 8.81 + // 8.82 + // Multiply with the hash key 8.83 + // 8.84 + __ movdqu(xmm_temp3, xmm_temp0); 8.85 + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 8.86 + __ movdqu(xmm_temp4, xmm_temp0); 8.87 + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 8.88 + 8.89 + __ movdqu(xmm_temp5, xmm_temp0); 8.90 + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 8.91 + __ movdqu(xmm_temp6, xmm_temp0); 8.92 + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 8.93 + 8.94 + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 8.95 + 8.96 + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 8.97 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right 8.98 + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left 8.99 + __ pxor(xmm_temp3, xmm_temp5); 8.100 + __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result 8.101 + // of the carry-less multiplication of 8.102 + // xmm0 by xmm1. 8.103 + 8.104 + // We shift the result of the multiplication by one bit position 8.105 + // to the left to cope for the fact that the bits are reversed. 8.106 + __ movdqu(xmm_temp7, xmm_temp3); 8.107 + __ movdqu(xmm_temp8, xmm_temp6); 8.108 + __ pslld(xmm_temp3, 1); 8.109 + __ pslld(xmm_temp6, 1); 8.110 + __ psrld(xmm_temp7, 31); 8.111 + __ psrld(xmm_temp8, 31); 8.112 + __ movdqu(xmm_temp9, xmm_temp7); 8.113 + __ pslldq(xmm_temp8, 4); 8.114 + __ pslldq(xmm_temp7, 4); 8.115 + __ psrldq(xmm_temp9, 12); 8.116 + __ por(xmm_temp3, xmm_temp7); 8.117 + __ por(xmm_temp6, xmm_temp8); 8.118 + __ por(xmm_temp6, xmm_temp9); 8.119 + 8.120 + // 8.121 + // First phase of the reduction 8.122 + // 8.123 + // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts 8.124 + // independently. 8.125 + __ movdqu(xmm_temp7, xmm_temp3); 8.126 + __ movdqu(xmm_temp8, xmm_temp3); 8.127 + __ movdqu(xmm_temp9, xmm_temp3); 8.128 + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 8.129 + __ pslld(xmm_temp8, 30); // packed right shift shifting << 30 8.130 + __ pslld(xmm_temp9, 25); // packed right shift shifting << 25 8.131 + __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions 8.132 + __ pxor(xmm_temp7, xmm_temp9); 8.133 + __ movdqu(xmm_temp8, xmm_temp7); 8.134 + __ pslldq(xmm_temp7, 12); 8.135 + __ psrldq(xmm_temp8, 4); 8.136 + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete 8.137 + 8.138 + // 8.139 + // Second phase of the reduction 8.140 + // 8.141 + // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these 8.142 + // shift operations. 8.143 + __ movdqu(xmm_temp2, xmm_temp3); 8.144 + __ movdqu(xmm_temp4, xmm_temp3); 8.145 + __ movdqu(xmm_temp5, xmm_temp3); 8.146 + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 8.147 + __ psrld(xmm_temp4, 2); // packed left shifting >> 2 8.148 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 8.149 + __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions 8.150 + __ pxor(xmm_temp2, xmm_temp5); 8.151 + __ pxor(xmm_temp2, xmm_temp8); 8.152 + __ pxor(xmm_temp3, xmm_temp2); 8.153 + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 8.154 + 8.155 + __ decrement(blocks); 8.156 + __ jcc(Assembler::zero, L_exit); 8.157 + __ movdqu(xmm_temp0, xmm_temp6); 8.158 + __ addptr(data, 16); 8.159 + __ jmp(L_ghash_loop); 8.160 + 8.161 + __ BIND(L_exit); 8.162 + __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result 8.163 + __ movdqu(Address(state, 0), xmm_temp6); // store the result 8.164 + 8.165 +#ifdef _WIN64 8.166 + // restore xmm regs belonging to calling function 8.167 + for (int i = 6; i <= XMM_REG_LAST; i++) { 8.168 + __ movdqu(as_XMMRegister(i), xmm_save(i)); 8.169 + } 8.170 +#endif 8.171 + __ leave(); 8.172 + __ ret(0); 8.173 + return start; 8.174 + } 8.175 + 8.176 /** 8.177 * Arguments: 8.178 * 8.179 @@ -4077,6 +4246,13 @@ 8.180 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 8.181 } 8.182 8.183 + // Generate GHASH intrinsics code 8.184 + if (UseGHASHIntrinsics) { 8.185 + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 8.186 + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 8.187 + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 8.188 + } 8.189 + 8.190 // Safefetch stubs. 8.191 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 8.192 &StubRoutines::_safefetch32_fault_pc,
9.1 --- a/src/cpu/x86/vm/stubRoutines_x86.cpp Wed Jul 31 14:28:51 2019 -0400 9.2 +++ b/src/cpu/x86/vm/stubRoutines_x86.cpp Wed Jun 17 17:48:25 2015 -0700 9.3 @@ -1,5 +1,5 @@ 9.4 /* 9.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 9.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 9.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 9.8 * 9.9 * This code is free software; you can redistribute it and/or modify it 9.10 @@ -33,6 +33,8 @@ 9.11 9.12 address StubRoutines::x86::_verify_mxcsr_entry = NULL; 9.13 address StubRoutines::x86::_key_shuffle_mask_addr = NULL; 9.14 +address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; 9.15 +address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; 9.16 9.17 uint64_t StubRoutines::x86::_crc_by128_masks[] = 9.18 {
10.1 --- a/src/cpu/x86/vm/stubRoutines_x86.hpp Wed Jul 31 14:28:51 2019 -0400 10.2 +++ b/src/cpu/x86/vm/stubRoutines_x86.hpp Wed Jun 17 17:48:25 2015 -0700 10.3 @@ -1,5 +1,5 @@ 10.4 /* 10.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 10.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 10.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 10.8 * 10.9 * This code is free software; you can redistribute it and/or modify it 10.10 @@ -36,10 +36,15 @@ 10.11 // masks and table for CRC32 10.12 static uint64_t _crc_by128_masks[]; 10.13 static juint _crc_table[]; 10.14 + // swap mask for ghash 10.15 + static address _ghash_long_swap_mask_addr; 10.16 + static address _ghash_byte_swap_mask_addr; 10.17 10.18 public: 10.19 static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } 10.20 static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } 10.21 static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } 10.22 + static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } 10.23 + static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } 10.24 10.25 #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
11.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Wed Jul 31 14:28:51 2019 -0400 11.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Wed Jun 17 17:48:25 2015 -0700 11.3 @@ -594,6 +594,17 @@ 11.4 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 11.5 } 11.6 11.7 + // GHASH/GCM intrinsics 11.8 + if (UseCLMUL && (UseSSE > 2)) { 11.9 + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 11.10 + UseGHASHIntrinsics = true; 11.11 + } 11.12 + } else if (UseGHASHIntrinsics) { 11.13 + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 11.14 + warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 11.15 + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 11.16 + } 11.17 + 11.18 if (UseSHA) { 11.19 warning("SHA instructions are not available on this CPU"); 11.20 FLAG_SET_DEFAULT(UseSHA, false);
12.1 --- a/src/share/vm/classfile/vmSymbols.hpp Wed Jul 31 14:28:51 2019 -0400 12.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Wed Jun 17 17:48:25 2015 -0700 12.3 @@ -863,6 +863,12 @@ 12.4 do_name( implCompressMB_name, "implCompressMultiBlock0") \ 12.5 do_signature(implCompressMB_signature, "([BII)I") \ 12.6 \ 12.7 + /* support for com.sun.crypto.provider.GHASH */ \ 12.8 + do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH") \ 12.9 + do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \ 12.10 + do_name(processBlocks_name, "processBlocks") \ 12.11 + do_signature(ghash_processBlocks_signature, "([BII[J[J)V") \ 12.12 + \ 12.13 /* support for java.util.zip */ \ 12.14 do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \ 12.15 do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \
13.1 --- a/src/share/vm/opto/escape.cpp Wed Jul 31 14:28:51 2019 -0400 13.2 +++ b/src/share/vm/opto/escape.cpp Wed Jun 17 17:48:25 2015 -0700 13.3 @@ -952,6 +952,7 @@ 13.4 strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || 13.5 strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || 13.6 strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 || 13.7 + strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 || 13.8 strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 || 13.9 strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 || 13.10 strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
14.1 --- a/src/share/vm/opto/library_call.cpp Wed Jul 31 14:28:51 2019 -0400 14.2 +++ b/src/share/vm/opto/library_call.cpp Wed Jun 17 17:48:25 2015 -0700 14.3 @@ -311,6 +311,7 @@ 14.4 Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting); 14.5 Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); 14.6 Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object); 14.7 + bool inline_ghash_processBlocks(); 14.8 bool inline_sha_implCompress(vmIntrinsics::ID id); 14.9 bool inline_digestBase_implCompressMB(int predicate); 14.10 bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA, 14.11 @@ -570,6 +571,10 @@ 14.12 predicates = 3; 14.13 break; 14.14 14.15 + case vmIntrinsics::_ghash_processBlocks: 14.16 + if (!UseGHASHIntrinsics) return NULL; 14.17 + break; 14.18 + 14.19 case vmIntrinsics::_updateCRC32: 14.20 case vmIntrinsics::_updateBytesCRC32: 14.21 case vmIntrinsics::_updateByteBufferCRC32: 14.22 @@ -957,6 +962,9 @@ 14.23 case vmIntrinsics::_montgomerySquare: 14.24 return inline_montgomerySquare(); 14.25 14.26 + case vmIntrinsics::_ghash_processBlocks: 14.27 + return inline_ghash_processBlocks(); 14.28 + 14.29 case vmIntrinsics::_encodeISOArray: 14.30 return inline_encodeISOArray(); 14.31 14.32 @@ -6599,6 +6607,35 @@ 14.33 return _gvn.transform(region); 14.34 } 14.35 14.36 +//------------------------------inline_ghash_processBlocks 14.37 +bool LibraryCallKit::inline_ghash_processBlocks() { 14.38 + address stubAddr; 14.39 + const char *stubName; 14.40 + assert(UseGHASHIntrinsics, "need GHASH intrinsics support"); 14.41 + 14.42 + stubAddr = StubRoutines::ghash_processBlocks(); 14.43 + stubName = "ghash_processBlocks"; 14.44 + 14.45 + Node* data = argument(0); 14.46 + Node* offset = argument(1); 14.47 + Node* len = argument(2); 14.48 + Node* state = argument(3); 14.49 + Node* subkeyH = argument(4); 14.50 + 14.51 + Node* state_start = array_element_address(state, intcon(0), T_LONG); 14.52 + assert(state_start, "state is NULL"); 14.53 + Node* subkeyH_start = array_element_address(subkeyH, intcon(0), T_LONG); 14.54 + assert(subkeyH_start, "subkeyH is NULL"); 14.55 + Node* data_start = array_element_address(data, offset, T_BYTE); 14.56 + assert(data_start, "data is NULL"); 14.57 + 14.58 + Node* ghash = make_runtime_call(RC_LEAF|RC_NO_FP, 14.59 + OptoRuntime::ghash_processBlocks_Type(), 14.60 + stubAddr, stubName, TypePtr::BOTTOM, 14.61 + state_start, subkeyH_start, data_start, len); 14.62 + return true; 14.63 +} 14.64 + 14.65 //------------------------------inline_sha_implCompress----------------------- 14.66 // 14.67 // Calculate SHA (i.e., SHA-1) for single-block byte[] array.
15.1 --- a/src/share/vm/opto/runtime.cpp Wed Jul 31 14:28:51 2019 -0400 15.2 +++ b/src/share/vm/opto/runtime.cpp Wed Jun 17 17:48:25 2015 -0700 15.3 @@ -92,7 +92,25 @@ 15.4 // At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000 15.5 15.6 15.7 +// GHASH block processing 15.8 +const TypeFunc* OptoRuntime::ghash_processBlocks_Type() { 15.9 + int argcnt = 4; 15.10 15.11 + const Type** fields = TypeTuple::fields(argcnt); 15.12 + int argp = TypeFunc::Parms; 15.13 + fields[argp++] = TypePtr::NOTNULL; // state 15.14 + fields[argp++] = TypePtr::NOTNULL; // subkeyH 15.15 + fields[argp++] = TypePtr::NOTNULL; // data 15.16 + fields[argp++] = TypeInt::INT; // blocks 15.17 + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 15.18 + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); 15.19 + 15.20 + // result type needed 15.21 + fields = TypeTuple::fields(1); 15.22 + fields[TypeFunc::Parms+0] = NULL; // void 15.23 + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); 15.24 + return TypeFunc::make(domain, range); 15.25 +} 15.26 15.27 // Compiled code entry points 15.28 address OptoRuntime::_new_instance_Java = NULL;
16.1 --- a/src/share/vm/opto/runtime.hpp Wed Jul 31 14:28:51 2019 -0400 16.2 +++ b/src/share/vm/opto/runtime.hpp Wed Jun 17 17:48:25 2015 -0700 16.3 @@ -311,6 +311,8 @@ 16.4 static const TypeFunc* montgomeryMultiply_Type(); 16.5 static const TypeFunc* montgomerySquare_Type(); 16.6 16.7 + static const TypeFunc* ghash_processBlocks_Type(); 16.8 + 16.9 static const TypeFunc* updateBytesCRC32_Type(); 16.10 16.11 // leaf on stack replacement interpreter accessor types
17.1 --- a/src/share/vm/runtime/globals.hpp Wed Jul 31 14:28:51 2019 -0400 17.2 +++ b/src/share/vm/runtime/globals.hpp Wed Jun 17 17:48:25 2015 -0700 17.3 @@ -602,6 +602,9 @@ 17.4 product(bool, UseSHA, false, \ 17.5 "Control whether SHA instructions can be used on SPARC") \ 17.6 \ 17.7 + product(bool, UseGHASHIntrinsics, false, \ 17.8 + "Use intrinsics for GHASH versions of crypto") \ 17.9 + \ 17.10 product(uintx, LargePageSizeInBytes, 0, \ 17.11 "Large page size (0 to let VM choose the page size)") \ 17.12 \
18.1 --- a/src/share/vm/runtime/stubRoutines.cpp Wed Jul 31 14:28:51 2019 -0400 18.2 +++ b/src/share/vm/runtime/stubRoutines.cpp Wed Jun 17 17:48:25 2015 -0700 18.3 @@ -124,6 +124,7 @@ 18.4 address StubRoutines::_aescrypt_decryptBlock = NULL; 18.5 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL; 18.6 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL; 18.7 +address StubRoutines::_ghash_processBlocks = NULL; 18.8 18.9 address StubRoutines::_sha1_implCompress = NULL; 18.10 address StubRoutines::_sha1_implCompressMB = NULL;
19.1 --- a/src/share/vm/runtime/stubRoutines.hpp Wed Jul 31 14:28:51 2019 -0400 19.2 +++ b/src/share/vm/runtime/stubRoutines.hpp Wed Jun 17 17:48:25 2015 -0700 19.3 @@ -197,6 +197,7 @@ 19.4 static address _aescrypt_decryptBlock; 19.5 static address _cipherBlockChaining_encryptAESCrypt; 19.6 static address _cipherBlockChaining_decryptAESCrypt; 19.7 + static address _ghash_processBlocks; 19.8 19.9 static address _sha1_implCompress; 19.10 static address _sha1_implCompressMB; 19.11 @@ -359,6 +360,7 @@ 19.12 static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; } 19.13 static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; } 19.14 static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; } 19.15 + static address ghash_processBlocks() { return _ghash_processBlocks; } 19.16 19.17 static address sha1_implCompress() { return _sha1_implCompress; } 19.18 static address sha1_implCompressMB() { return _sha1_implCompressMB; }
20.1 --- a/src/share/vm/runtime/vmStructs.cpp Wed Jul 31 14:28:51 2019 -0400 20.2 +++ b/src/share/vm/runtime/vmStructs.cpp Wed Jun 17 17:48:25 2015 -0700 20.3 @@ -810,6 +810,7 @@ 20.4 static_field(StubRoutines, _aescrypt_decryptBlock, address) \ 20.5 static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \ 20.6 static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \ 20.7 + static_field(StubRoutines, _ghash_processBlocks, address) \ 20.8 static_field(StubRoutines, _updateBytesCRC32, address) \ 20.9 static_field(StubRoutines, _crc_table_adr, address) \ 20.10 static_field(StubRoutines, _multiplyToLen, address) \
21.1 --- a/test/compiler/7184394/TestAESBase.java Wed Jul 31 14:28:51 2019 -0400 21.2 +++ b/test/compiler/7184394/TestAESBase.java Wed Jun 17 17:48:25 2015 -0700 21.3 @@ -29,6 +29,7 @@ 21.4 import javax.crypto.Cipher; 21.5 import javax.crypto.KeyGenerator; 21.6 import javax.crypto.SecretKey; 21.7 +import javax.crypto.spec.GCMParameterSpec; 21.8 import javax.crypto.spec.IvParameterSpec; 21.9 import javax.crypto.spec.SecretKeySpec; 21.10 import java.security.AlgorithmParameters; 21.11 @@ -64,6 +65,10 @@ 21.12 Cipher dCipher; 21.13 AlgorithmParameters algParams; 21.14 SecretKey key; 21.15 + GCMParameterSpec gcm_spec; 21.16 + byte[] aad; 21.17 + int tlen = 12; 21.18 + byte[] iv; 21.19 21.20 static int numThreads = 0; 21.21 int threadId; 21.22 @@ -102,6 +107,12 @@ 21.23 int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0); 21.24 IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]); 21.25 cipher.init(Cipher.ENCRYPT_MODE, key, initVector); 21.26 + } else if (mode.equals("GCM")) { 21.27 + iv = new byte[64]; 21.28 + random.nextBytes(iv); 21.29 + aad = new byte[5]; 21.30 + random.nextBytes(aad); 21.31 + gcm_init(); 21.32 } else { 21.33 algParams = cipher.getParameters(); 21.34 cipher.init(Cipher.ENCRYPT_MODE, key, algParams); 21.35 @@ -188,4 +199,12 @@ 21.36 } 21.37 21.38 abstract void childShowCipher(); 21.39 + 21.40 + void gcm_init() throws Exception { 21.41 + tlen = 12; 21.42 + gcm_spec = new GCMParameterSpec(tlen * 8, iv); 21.43 + cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE"); 21.44 + cipher.init(Cipher.ENCRYPT_MODE, key, gcm_spec); 21.45 + cipher.update(aad); 21.46 + } 21.47 }
22.1 --- a/test/compiler/7184394/TestAESEncode.java Wed Jul 31 14:28:51 2019 -0400 22.2 +++ b/test/compiler/7184394/TestAESEncode.java Wed Jun 17 17:48:25 2015 -0700 22.3 @@ -1,5 +1,5 @@ 22.4 /* 22.5 - * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 22.6 + * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved. 22.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 22.8 * 22.9 * This code is free software; you can redistribute it and/or modify it 22.10 @@ -32,7 +32,11 @@ 22.11 @Override 22.12 public void run() { 22.13 try { 22.14 - if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams); 22.15 + if (mode.equals("GCM")) { 22.16 + gcm_init(); 22.17 + } else if (!noReinit) { 22.18 + cipher.init(Cipher.ENCRYPT_MODE, key, algParams); 22.19 + } 22.20 encode = new byte[encodeLength]; 22.21 if (testingMisalignment) { 22.22 int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
23.1 --- a/test/compiler/7184394/TestAESMain.java Wed Jul 31 14:28:51 2019 -0400 23.2 +++ b/test/compiler/7184394/TestAESMain.java Wed Jun 17 17:48:25 2015 -0700 23.3 @@ -41,6 +41,13 @@ 23.4 * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain 23.5 * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain 23.6 * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain 23.7 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM TestAESMain 23.8 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 TestAESMain 23.9 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 TestAESMain 23.10 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 TestAESMain 23.11 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain 23.12 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain 23.13 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain 23.14 * 23.15 * @author Tom Deneau 23.16 */