src/cpu/sparc/vm/stubGenerator_sparc.cpp

changeset 9806
758c07667682
parent 9703
2fdf635bcf28
parent 9788
44ef77ad417c
     1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Feb 04 17:38:01 2020 +0800
     1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Feb 04 18:13:14 2020 +0800
     1.3 @@ -4788,6 +4788,130 @@
     1.4      return start;
     1.5    }
     1.6  
     1.7 +  /* Single and multi-block ghash operations */
     1.8 +  address generate_ghash_processBlocks() {
     1.9 +      __ align(CodeEntryAlignment);
    1.10 +      Label L_ghash_loop, L_aligned, L_main;
    1.11 +      StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
    1.12 +      address start = __ pc();
    1.13 +
    1.14 +      Register state = I0;
    1.15 +      Register subkeyH = I1;
    1.16 +      Register data = I2;
    1.17 +      Register len = I3;
    1.18 +
    1.19 +      __ save_frame(0);
    1.20 +
    1.21 +      __ ldx(state, 0, O0);
    1.22 +      __ ldx(state, 8, O1);
    1.23 +
    1.24 +      // Loop label for multiblock operations
    1.25 +      __ BIND(L_ghash_loop);
    1.26 +
    1.27 +      // Check if 'data' is unaligned
    1.28 +      __ andcc(data, 7, G1);
    1.29 +      __ br(Assembler::zero, false, Assembler::pt, L_aligned);
    1.30 +      __ delayed()->nop();
    1.31 +
    1.32 +      Register left_shift = L1;
    1.33 +      Register right_shift = L2;
    1.34 +      Register data_ptr = L3;
    1.35 +
    1.36 +      // Get left and right shift values in bits
    1.37 +      __ sll(G1, LogBitsPerByte, left_shift);
    1.38 +      __ mov(64, right_shift);
    1.39 +      __ sub(right_shift, left_shift, right_shift);
    1.40 +
    1.41 +      // Align to read 'data'
    1.42 +      __ sub(data, G1, data_ptr);
    1.43 +
    1.44 +      // Load first 8 bytes of 'data'
    1.45 +      __ ldx(data_ptr, 0, O4);
    1.46 +      __ sllx(O4, left_shift, O4);
    1.47 +      __ ldx(data_ptr, 8, O5);
    1.48 +      __ srlx(O5, right_shift, G4);
    1.49 +      __ bset(G4, O4);
    1.50 +
    1.51 +      // Load second 8 bytes of 'data'
    1.52 +      __ sllx(O5, left_shift, O5);
    1.53 +      __ ldx(data_ptr, 16, G4);
    1.54 +      __ srlx(G4, right_shift, G4);
    1.55 +      __ ba(L_main);
    1.56 +      __ delayed()->bset(G4, O5);
    1.57 +
    1.58 +      // If 'data' is aligned, load normally
    1.59 +      __ BIND(L_aligned);
    1.60 +      __ ldx(data, 0, O4);
    1.61 +      __ ldx(data, 8, O5);
    1.62 +
    1.63 +      __ BIND(L_main);
    1.64 +      __ ldx(subkeyH, 0, O2);
    1.65 +      __ ldx(subkeyH, 8, O3);
    1.66 +
    1.67 +      __ xor3(O0, O4, O0);
    1.68 +      __ xor3(O1, O5, O1);
    1.69 +
    1.70 +      __ xmulxhi(O0, O3, G3);
    1.71 +      __ xmulx(O0, O2, O5);
    1.72 +      __ xmulxhi(O1, O2, G4);
    1.73 +      __ xmulxhi(O1, O3, G5);
    1.74 +      __ xmulx(O0, O3, G1);
    1.75 +      __ xmulx(O1, O3, G2);
    1.76 +      __ xmulx(O1, O2, O3);
    1.77 +      __ xmulxhi(O0, O2, O4);
    1.78 +
    1.79 +      __ mov(0xE1, O0);
    1.80 +      __ sllx(O0, 56, O0);
    1.81 +
    1.82 +      __ xor3(O5, G3, O5);
    1.83 +      __ xor3(O5, G4, O5);
    1.84 +      __ xor3(G5, G1, G1);
    1.85 +      __ xor3(G1, O3, G1);
    1.86 +      __ srlx(G2, 63, O1);
    1.87 +      __ srlx(G1, 63, G3);
    1.88 +      __ sllx(G2, 63, O3);
    1.89 +      __ sllx(G2, 58, O2);
    1.90 +      __ xor3(O3, O2, O2);
    1.91 +
    1.92 +      __ sllx(G1, 1, G1);
    1.93 +      __ or3(G1, O1, G1);
    1.94 +
    1.95 +      __ xor3(G1, O2, G1);
    1.96 +
    1.97 +      __ sllx(G2, 1, G2);
    1.98 +
    1.99 +      __ xmulxhi(G1, O0, O1);
   1.100 +      __ xmulx(G1, O0, O2);
   1.101 +      __ xmulxhi(G2, O0, O3);
   1.102 +      __ xmulx(G2, O0, G1);
   1.103 +
   1.104 +      __ xor3(O4, O1, O4);
   1.105 +      __ xor3(O5, O2, O5);
   1.106 +      __ xor3(O5, O3, O5);
   1.107 +
   1.108 +      __ sllx(O4, 1, O2);
   1.109 +      __ srlx(O5, 63, O3);
   1.110 +
   1.111 +      __ or3(O2, O3, O0);
   1.112 +
   1.113 +      __ sllx(O5, 1, O1);
   1.114 +      __ srlx(G1, 63, O2);
   1.115 +      __ or3(O1, O2, O1);
   1.116 +      __ xor3(O1, G3, O1);
   1.117 +
   1.118 +      __ deccc(len);
   1.119 +      __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop);
   1.120 +      __ delayed()->add(data, 16, data);
   1.121 +
   1.122 +      __ stx(O0, I0, 0);
   1.123 +      __ stx(O1, I0, 8);
   1.124 +
   1.125 +      __ ret();
   1.126 +      __ delayed()->restore();
   1.127 +
   1.128 +      return start;
   1.129 +  }
   1.130 +
   1.131    void generate_initial() {
   1.132      // Generates all stubs and initializes the entry points
   1.133  
   1.134 @@ -4860,6 +4984,10 @@
   1.135        StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
   1.136        StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
   1.137      }
   1.138 +    // generate GHASH intrinsics code
   1.139 +    if (UseGHASHIntrinsics) {
   1.140 +      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
   1.141 +    }
   1.142  
   1.143      // generate SHA1/SHA256/SHA512 intrinsics code
   1.144      if (UseSHA1Intrinsics) {

mercurial