1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Feb 04 17:38:01 2020 +0800 1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Feb 04 18:13:14 2020 +0800 1.3 @@ -4788,6 +4788,130 @@ 1.4 return start; 1.5 } 1.6 1.7 + /* Single and multi-block ghash operations */ 1.8 + address generate_ghash_processBlocks() { 1.9 + __ align(CodeEntryAlignment); 1.10 + Label L_ghash_loop, L_aligned, L_main; 1.11 + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); 1.12 + address start = __ pc(); 1.13 + 1.14 + Register state = I0; 1.15 + Register subkeyH = I1; 1.16 + Register data = I2; 1.17 + Register len = I3; 1.18 + 1.19 + __ save_frame(0); 1.20 + 1.21 + __ ldx(state, 0, O0); 1.22 + __ ldx(state, 8, O1); 1.23 + 1.24 + // Loop label for multiblock operations 1.25 + __ BIND(L_ghash_loop); 1.26 + 1.27 + // Check if 'data' is unaligned 1.28 + __ andcc(data, 7, G1); 1.29 + __ br(Assembler::zero, false, Assembler::pt, L_aligned); 1.30 + __ delayed()->nop(); 1.31 + 1.32 + Register left_shift = L1; 1.33 + Register right_shift = L2; 1.34 + Register data_ptr = L3; 1.35 + 1.36 + // Get left and right shift values in bits 1.37 + __ sll(G1, LogBitsPerByte, left_shift); 1.38 + __ mov(64, right_shift); 1.39 + __ sub(right_shift, left_shift, right_shift); 1.40 + 1.41 + // Align to read 'data' 1.42 + __ sub(data, G1, data_ptr); 1.43 + 1.44 + // Load first 8 bytes of 'data' 1.45 + __ ldx(data_ptr, 0, O4); 1.46 + __ sllx(O4, left_shift, O4); 1.47 + __ ldx(data_ptr, 8, O5); 1.48 + __ srlx(O5, right_shift, G4); 1.49 + __ bset(G4, O4); 1.50 + 1.51 + // Load second 8 bytes of 'data' 1.52 + __ sllx(O5, left_shift, O5); 1.53 + __ ldx(data_ptr, 16, G4); 1.54 + __ srlx(G4, right_shift, G4); 1.55 + __ ba(L_main); 1.56 + __ delayed()->bset(G4, O5); 1.57 + 1.58 + // If 'data' is aligned, load normally 1.59 + __ BIND(L_aligned); 1.60 + __ ldx(data, 0, O4); 1.61 + __ ldx(data, 8, O5); 1.62 + 1.63 + __ BIND(L_main); 1.64 + __ ldx(subkeyH, 0, O2); 1.65 + __ ldx(subkeyH, 8, O3); 1.66 + 1.67 + __ xor3(O0, O4, O0); 1.68 + __ xor3(O1, O5, O1); 1.69 + 1.70 + __ xmulxhi(O0, O3, G3); 1.71 + __ xmulx(O0, O2, O5); 1.72 + __ xmulxhi(O1, O2, G4); 1.73 + __ xmulxhi(O1, O3, G5); 1.74 + __ xmulx(O0, O3, G1); 1.75 + __ xmulx(O1, O3, G2); 1.76 + __ xmulx(O1, O2, O3); 1.77 + __ xmulxhi(O0, O2, O4); 1.78 + 1.79 + __ mov(0xE1, O0); 1.80 + __ sllx(O0, 56, O0); 1.81 + 1.82 + __ xor3(O5, G3, O5); 1.83 + __ xor3(O5, G4, O5); 1.84 + __ xor3(G5, G1, G1); 1.85 + __ xor3(G1, O3, G1); 1.86 + __ srlx(G2, 63, O1); 1.87 + __ srlx(G1, 63, G3); 1.88 + __ sllx(G2, 63, O3); 1.89 + __ sllx(G2, 58, O2); 1.90 + __ xor3(O3, O2, O2); 1.91 + 1.92 + __ sllx(G1, 1, G1); 1.93 + __ or3(G1, O1, G1); 1.94 + 1.95 + __ xor3(G1, O2, G1); 1.96 + 1.97 + __ sllx(G2, 1, G2); 1.98 + 1.99 + __ xmulxhi(G1, O0, O1); 1.100 + __ xmulx(G1, O0, O2); 1.101 + __ xmulxhi(G2, O0, O3); 1.102 + __ xmulx(G2, O0, G1); 1.103 + 1.104 + __ xor3(O4, O1, O4); 1.105 + __ xor3(O5, O2, O5); 1.106 + __ xor3(O5, O3, O5); 1.107 + 1.108 + __ sllx(O4, 1, O2); 1.109 + __ srlx(O5, 63, O3); 1.110 + 1.111 + __ or3(O2, O3, O0); 1.112 + 1.113 + __ sllx(O5, 1, O1); 1.114 + __ srlx(G1, 63, O2); 1.115 + __ or3(O1, O2, O1); 1.116 + __ xor3(O1, G3, O1); 1.117 + 1.118 + __ deccc(len); 1.119 + __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop); 1.120 + __ delayed()->add(data, 16, data); 1.121 + 1.122 + __ stx(O0, I0, 0); 1.123 + __ stx(O1, I0, 8); 1.124 + 1.125 + __ ret(); 1.126 + __ delayed()->restore(); 1.127 + 1.128 + return start; 1.129 + } 1.130 + 1.131 void generate_initial() { 1.132 // Generates all stubs and initializes the entry points 1.133 1.134 @@ -4860,6 +4984,10 @@ 1.135 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 1.136 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 1.137 } 1.138 + // generate GHASH intrinsics code 1.139 + if (UseGHASHIntrinsics) { 1.140 + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 1.141 + } 1.142 1.143 // generate SHA1/SHA256/SHA512 intrinsics code 1.144 if (UseSHA1Intrinsics) {