8073108: Use x86 and SPARC CPU instructions for GHASH acceleration

Wed, 17 Jun 2015 17:48:25 -0700

author
ascarpino
date
Wed, 17 Jun 2015 17:48:25 -0700
changeset 9788
44ef77ad417c
parent 9787
9f28a4cac6d9
child 9789
e55d4d896e30

8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
Reviewed-by: kvn, jrose, phh

src/cpu/ppc/vm/vm_version_ppc.cpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/assembler_sparc.hpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/stubGenerator_sparc.cpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/vm_version_sparc.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubGenerator_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubGenerator_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubRoutines_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubRoutines_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/escape.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/stubRoutines.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/stubRoutines.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/vmStructs.cpp file | annotate | diff | comparison | revisions
test/compiler/7184394/TestAESBase.java file | annotate | diff | comparison | revisions
test/compiler/7184394/TestAESEncode.java file | annotate | diff | comparison | revisions
test/compiler/7184394/TestAESMain.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Jul 31 14:28:51 2019 -0400
     1.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Jun 17 17:48:25 2015 -0700
     1.3 @@ -194,6 +194,11 @@
     1.4      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
     1.5    }
     1.6  
     1.7 +  if (UseGHASHIntrinsics) {
     1.8 +    warning("GHASH intrinsics are not available on this CPU");
     1.9 +    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
    1.10 +  }
    1.11 +
    1.12    if (has_vshasig()) {
    1.13      if (FLAG_IS_DEFAULT(UseSHA)) {
    1.14        UseSHA = true;
     2.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jul 31 14:28:51 2019 -0400
     2.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jun 17 17:48:25 2015 -0700
     2.3 @@ -1,5 +1,5 @@
     2.4  /*
     2.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     2.6 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
     2.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.8   *
     2.9   * This code is free software; you can redistribute it and/or modify it
    2.10 @@ -129,6 +129,7 @@
    2.11      flog3_op3    = 0x36,
    2.12      edge_op3     = 0x36,
    2.13      fsrc_op3     = 0x36,
    2.14 +    xmulx_op3    = 0x36,
    2.15      impdep2_op3  = 0x37,
    2.16      stpartialf_op3 = 0x37,
    2.17      jmpl_op3     = 0x38,
    2.18 @@ -220,6 +221,8 @@
    2.19      mdtox_opf          = 0x110,
    2.20      mstouw_opf         = 0x111,
    2.21      mstosw_opf         = 0x113,
    2.22 +    xmulx_opf          = 0x115,
    2.23 +    xmulxhi_opf        = 0x116,
    2.24      mxtod_opf          = 0x118,
    2.25      mwtos_opf          = 0x119,
    2.26  
    2.27 @@ -1212,6 +1215,9 @@
    2.28    void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
    2.29    void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
    2.30  
    2.31 +  void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); }
    2.32 +  void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); }
    2.33 +
    2.34    // Crypto SHA instructions
    2.35  
    2.36    void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
     3.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 31 14:28:51 2019 -0400
     3.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jun 17 17:48:25 2015 -0700
     3.3 @@ -4788,6 +4788,130 @@
     3.4      return start;
     3.5    }
     3.6  
     3.7 +  /* Single and multi-block ghash operations */
     3.8 +  address generate_ghash_processBlocks() {
     3.9 +      __ align(CodeEntryAlignment);
    3.10 +      Label L_ghash_loop, L_aligned, L_main;
    3.11 +      StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
    3.12 +      address start = __ pc();
    3.13 +
    3.14 +      Register state = I0;
    3.15 +      Register subkeyH = I1;
    3.16 +      Register data = I2;
    3.17 +      Register len = I3;
    3.18 +
    3.19 +      __ save_frame(0);
    3.20 +
    3.21 +      __ ldx(state, 0, O0);
    3.22 +      __ ldx(state, 8, O1);
    3.23 +
    3.24 +      // Loop label for multiblock operations
    3.25 +      __ BIND(L_ghash_loop);
    3.26 +
    3.27 +      // Check if 'data' is unaligned
    3.28 +      __ andcc(data, 7, G1);
    3.29 +      __ br(Assembler::zero, false, Assembler::pt, L_aligned);
    3.30 +      __ delayed()->nop();
    3.31 +
    3.32 +      Register left_shift = L1;
    3.33 +      Register right_shift = L2;
    3.34 +      Register data_ptr = L3;
    3.35 +
    3.36 +      // Get left and right shift values in bits
    3.37 +      __ sll(G1, LogBitsPerByte, left_shift);
    3.38 +      __ mov(64, right_shift);
    3.39 +      __ sub(right_shift, left_shift, right_shift);
    3.40 +
    3.41 +      // Align to read 'data'
    3.42 +      __ sub(data, G1, data_ptr);
    3.43 +
    3.44 +      // Load first 8 bytes of 'data'
    3.45 +      __ ldx(data_ptr, 0, O4);
    3.46 +      __ sllx(O4, left_shift, O4);
    3.47 +      __ ldx(data_ptr, 8, O5);
    3.48 +      __ srlx(O5, right_shift, G4);
    3.49 +      __ bset(G4, O4);
    3.50 +
    3.51 +      // Load second 8 bytes of 'data'
    3.52 +      __ sllx(O5, left_shift, O5);
    3.53 +      __ ldx(data_ptr, 16, G4);
    3.54 +      __ srlx(G4, right_shift, G4);
    3.55 +      __ ba(L_main);
    3.56 +      __ delayed()->bset(G4, O5);
    3.57 +
    3.58 +      // If 'data' is aligned, load normally
    3.59 +      __ BIND(L_aligned);
    3.60 +      __ ldx(data, 0, O4);
    3.61 +      __ ldx(data, 8, O5);
    3.62 +
    3.63 +      __ BIND(L_main);
    3.64 +      __ ldx(subkeyH, 0, O2);
    3.65 +      __ ldx(subkeyH, 8, O3);
    3.66 +
    3.67 +      __ xor3(O0, O4, O0);
    3.68 +      __ xor3(O1, O5, O1);
    3.69 +
    3.70 +      __ xmulxhi(O0, O3, G3);
    3.71 +      __ xmulx(O0, O2, O5);
    3.72 +      __ xmulxhi(O1, O2, G4);
    3.73 +      __ xmulxhi(O1, O3, G5);
    3.74 +      __ xmulx(O0, O3, G1);
    3.75 +      __ xmulx(O1, O3, G2);
    3.76 +      __ xmulx(O1, O2, O3);
    3.77 +      __ xmulxhi(O0, O2, O4);
    3.78 +
    3.79 +      __ mov(0xE1, O0);
    3.80 +      __ sllx(O0, 56, O0);
    3.81 +
    3.82 +      __ xor3(O5, G3, O5);
    3.83 +      __ xor3(O5, G4, O5);
    3.84 +      __ xor3(G5, G1, G1);
    3.85 +      __ xor3(G1, O3, G1);
    3.86 +      __ srlx(G2, 63, O1);
    3.87 +      __ srlx(G1, 63, G3);
    3.88 +      __ sllx(G2, 63, O3);
    3.89 +      __ sllx(G2, 58, O2);
    3.90 +      __ xor3(O3, O2, O2);
    3.91 +
    3.92 +      __ sllx(G1, 1, G1);
    3.93 +      __ or3(G1, O1, G1);
    3.94 +
    3.95 +      __ xor3(G1, O2, G1);
    3.96 +
    3.97 +      __ sllx(G2, 1, G2);
    3.98 +
    3.99 +      __ xmulxhi(G1, O0, O1);
   3.100 +      __ xmulx(G1, O0, O2);
   3.101 +      __ xmulxhi(G2, O0, O3);
   3.102 +      __ xmulx(G2, O0, G1);
   3.103 +
   3.104 +      __ xor3(O4, O1, O4);
   3.105 +      __ xor3(O5, O2, O5);
   3.106 +      __ xor3(O5, O3, O5);
   3.107 +
   3.108 +      __ sllx(O4, 1, O2);
   3.109 +      __ srlx(O5, 63, O3);
   3.110 +
   3.111 +      __ or3(O2, O3, O0);
   3.112 +
   3.113 +      __ sllx(O5, 1, O1);
   3.114 +      __ srlx(G1, 63, O2);
   3.115 +      __ or3(O1, O2, O1);
   3.116 +      __ xor3(O1, G3, O1);
   3.117 +
   3.118 +      __ deccc(len);
   3.119 +      __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop);
   3.120 +      __ delayed()->add(data, 16, data);
   3.121 +
   3.122 +      __ stx(O0, I0, 0);
   3.123 +      __ stx(O1, I0, 8);
   3.124 +
   3.125 +      __ ret();
   3.126 +      __ delayed()->restore();
   3.127 +
   3.128 +      return start;
   3.129 +  }
   3.130 +
   3.131    void generate_initial() {
   3.132      // Generates all stubs and initializes the entry points
   3.133  
   3.134 @@ -4860,6 +4984,10 @@
   3.135        StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
   3.136        StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
   3.137      }
   3.138 +    // generate GHASH intrinsics code
   3.139 +    if (UseGHASHIntrinsics) {
   3.140 +      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
   3.141 +    }
   3.142  
   3.143      // generate SHA1/SHA256/SHA512 intrinsics code
   3.144      if (UseSHA1Intrinsics) {
     4.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Jul 31 14:28:51 2019 -0400
     4.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Jun 17 17:48:25 2015 -0700
     4.3 @@ -319,6 +319,17 @@
     4.4      }
     4.5    }
     4.6  
     4.7 +  // GHASH/GCM intrinsics
     4.8 +  if (has_vis3() && (UseVIS > 2)) {
     4.9 +    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
    4.10 +      UseGHASHIntrinsics = true;
    4.11 +    }
    4.12 +  } else if (UseGHASHIntrinsics) {
    4.13 +    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
    4.14 +      warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled");
    4.15 +    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
    4.16 +  }
    4.17 +
    4.18    // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
    4.19    if (has_sha1() || has_sha256() || has_sha512()) {
    4.20      if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
     5.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 31 14:28:51 2019 -0400
     5.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Jun 17 17:48:25 2015 -0700
     5.3 @@ -2575,6 +2575,15 @@
     5.4    emit_int8(shift);
     5.5  }
     5.6  
     5.7 +void Assembler::pslldq(XMMRegister dst, int shift) {
     5.8 +  // Shift left 128 bit value in xmm register by number of bytes.
     5.9 +  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    5.10 +  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66);
    5.11 +  emit_int8(0x73);
    5.12 +  emit_int8((unsigned char)(0xC0 | encode));
    5.13 +  emit_int8(shift);
    5.14 +}
    5.15 +
    5.16  void Assembler::ptest(XMMRegister dst, Address src) {
    5.17    assert(VM_Version::supports_sse4_1(), "");
    5.18    assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
     6.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 31 14:28:51 2019 -0400
     6.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Jun 17 17:48:25 2015 -0700
     6.3 @@ -1527,6 +1527,8 @@
     6.4  
     6.5    // Shift Right by bytes Logical DoubleQuadword Immediate
     6.6    void psrldq(XMMRegister dst, int shift);
     6.7 +  // Shift Left by bytes Logical DoubleQuadword Immediate
     6.8 +  void pslldq(XMMRegister dst, int shift);
     6.9  
    6.10    // Logical Compare 128bit
    6.11    void ptest(XMMRegister dst, XMMRegister src);
     7.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 31 14:28:51 2019 -0400
     7.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jun 17 17:48:25 2015 -0700
     7.3 @@ -2719,6 +2719,167 @@
     7.4      return start;
     7.5    }
     7.6  
     7.7 +  // byte swap x86 long
     7.8 +  address generate_ghash_long_swap_mask() {
     7.9 +    __ align(CodeEntryAlignment);
    7.10 +    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
    7.11 +    address start = __ pc();
    7.12 +    __ emit_data(0x0b0a0908, relocInfo::none, 0);
    7.13 +    __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
    7.14 +    __ emit_data(0x03020100, relocInfo::none, 0);
    7.15 +    __ emit_data(0x07060504, relocInfo::none, 0);
    7.16 +
    7.17 +  return start;
    7.18 +  }
    7.19 +
    7.20 +  // byte swap x86 byte array
    7.21 +  address generate_ghash_byte_swap_mask() {
    7.22 +    __ align(CodeEntryAlignment);
    7.23 +    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
    7.24 +    address start = __ pc();
    7.25 +    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
    7.26 +    __ emit_data(0x08090a0b, relocInfo::none, 0);
    7.27 +    __ emit_data(0x04050607, relocInfo::none, 0);
    7.28 +    __ emit_data(0x00010203, relocInfo::none, 0);
    7.29 +  return start;
    7.30 +  }
    7.31 +
    7.32 +  /* Single and multi-block ghash operations */
    7.33 +  address generate_ghash_processBlocks() {
    7.34 +    assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
    7.35 +    __ align(CodeEntryAlignment);
    7.36 +    Label L_ghash_loop, L_exit;
    7.37 +    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
    7.38 +    address start = __ pc();
    7.39 +
    7.40 +    const Register state        = rdi;
    7.41 +    const Register subkeyH      = rsi;
    7.42 +    const Register data         = rdx;
    7.43 +    const Register blocks       = rcx;
    7.44 +
    7.45 +    const Address  state_param(rbp, 8+0);
    7.46 +    const Address  subkeyH_param(rbp, 8+4);
    7.47 +    const Address  data_param(rbp, 8+8);
    7.48 +    const Address  blocks_param(rbp, 8+12);
    7.49 +
    7.50 +    const XMMRegister xmm_temp0 = xmm0;
    7.51 +    const XMMRegister xmm_temp1 = xmm1;
    7.52 +    const XMMRegister xmm_temp2 = xmm2;
    7.53 +    const XMMRegister xmm_temp3 = xmm3;
    7.54 +    const XMMRegister xmm_temp4 = xmm4;
    7.55 +    const XMMRegister xmm_temp5 = xmm5;
    7.56 +    const XMMRegister xmm_temp6 = xmm6;
    7.57 +    const XMMRegister xmm_temp7 = xmm7;
    7.58 +
    7.59 +    __ enter();
    7.60 +
    7.61 +    __ movptr(state, state_param);
    7.62 +    __ movptr(subkeyH, subkeyH_param);
    7.63 +    __ movptr(data, data_param);
    7.64 +    __ movptr(blocks, blocks_param);
    7.65 +
    7.66 +    __ movdqu(xmm_temp0, Address(state, 0));
    7.67 +    __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
    7.68 +
    7.69 +    __ movdqu(xmm_temp1, Address(subkeyH, 0));
    7.70 +    __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
    7.71 +
    7.72 +    __ BIND(L_ghash_loop);
    7.73 +    __ movdqu(xmm_temp2, Address(data, 0));
    7.74 +    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
    7.75 +
    7.76 +    __ pxor(xmm_temp0, xmm_temp2);
    7.77 +
    7.78 +    //
    7.79 +    // Multiply with the hash key
    7.80 +    //
    7.81 +    __ movdqu(xmm_temp3, xmm_temp0);
    7.82 +    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
    7.83 +    __ movdqu(xmm_temp4, xmm_temp0);
    7.84 +    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
    7.85 +
    7.86 +    __ movdqu(xmm_temp5, xmm_temp0);
    7.87 +    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
    7.88 +    __ movdqu(xmm_temp6, xmm_temp0);
    7.89 +    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
    7.90 +
    7.91 +    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
    7.92 +
    7.93 +    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
    7.94 +    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
    7.95 +    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
    7.96 +    __ pxor(xmm_temp3, xmm_temp5);
    7.97 +    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
    7.98 +                                        // of the carry-less multiplication of
    7.99 +                                        // xmm0 by xmm1.
   7.100 +
   7.101 +    // We shift the result of the multiplication by one bit position
   7.102 +    // to the left to cope for the fact that the bits are reversed.
   7.103 +    __ movdqu(xmm_temp7, xmm_temp3);
   7.104 +    __ movdqu(xmm_temp4, xmm_temp6);
   7.105 +    __ pslld (xmm_temp3, 1);
   7.106 +    __ pslld(xmm_temp6, 1);
   7.107 +    __ psrld(xmm_temp7, 31);
   7.108 +    __ psrld(xmm_temp4, 31);
   7.109 +    __ movdqu(xmm_temp5, xmm_temp7);
   7.110 +    __ pslldq(xmm_temp4, 4);
   7.111 +    __ pslldq(xmm_temp7, 4);
   7.112 +    __ psrldq(xmm_temp5, 12);
   7.113 +    __ por(xmm_temp3, xmm_temp7);
   7.114 +    __ por(xmm_temp6, xmm_temp4);
   7.115 +    __ por(xmm_temp6, xmm_temp5);
   7.116 +
   7.117 +    //
   7.118 +    // First phase of the reduction
   7.119 +    //
   7.120 +    // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
   7.121 +    // independently.
   7.122 +    __ movdqu(xmm_temp7, xmm_temp3);
   7.123 +    __ movdqu(xmm_temp4, xmm_temp3);
   7.124 +    __ movdqu(xmm_temp5, xmm_temp3);
   7.125 +    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
   7.126 +    __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
   7.127 +    __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
   7.128 +    __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
   7.129 +    __ pxor(xmm_temp7, xmm_temp5);
   7.130 +    __ movdqu(xmm_temp4, xmm_temp7);
   7.131 +    __ pslldq(xmm_temp7, 12);
   7.132 +    __ psrldq(xmm_temp4, 4);
   7.133 +    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
   7.134 +
   7.135 +    //
   7.136 +    // Second phase of the reduction
   7.137 +    //
   7.138 +    // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
   7.139 +    // shift operations.
   7.140 +    __ movdqu(xmm_temp2, xmm_temp3);
   7.141 +    __ movdqu(xmm_temp7, xmm_temp3);
   7.142 +    __ movdqu(xmm_temp5, xmm_temp3);
   7.143 +    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
   7.144 +    __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
   7.145 +    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
   7.146 +    __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
   7.147 +    __ pxor(xmm_temp2, xmm_temp5);
   7.148 +    __ pxor(xmm_temp2, xmm_temp4);
   7.149 +    __ pxor(xmm_temp3, xmm_temp2);
   7.150 +    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
   7.151 +
   7.152 +    __ decrement(blocks);
   7.153 +    __ jcc(Assembler::zero, L_exit);
   7.154 +    __ movdqu(xmm_temp0, xmm_temp6);
   7.155 +    __ addptr(data, 16);
   7.156 +    __ jmp(L_ghash_loop);
   7.157 +
   7.158 +    __ BIND(L_exit);
   7.159 +       // Byte swap 16-byte result
   7.160 +    __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
   7.161 +    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
   7.162 +
   7.163 +    __ leave();
   7.164 +    __ ret(0);
   7.165 +    return start;
   7.166 +  }
   7.167 +
   7.168    /**
   7.169     *  Arguments:
   7.170     *
   7.171 @@ -3018,6 +3179,13 @@
   7.172        StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
   7.173      }
   7.174  
   7.175 +    // Generate GHASH intrinsics code
   7.176 +    if (UseGHASHIntrinsics) {
   7.177 +      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
   7.178 +      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
   7.179 +      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
   7.180 +    }
   7.181 +
   7.182      // Safefetch stubs.
   7.183      generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
   7.184                                                     &StubRoutines::_safefetch32_fault_pc,
     8.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 31 14:28:51 2019 -0400
     8.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jun 17 17:48:25 2015 -0700
     8.3 @@ -3639,6 +3639,175 @@
     8.4      return start;
     8.5    }
     8.6  
     8.7 +
     8.8 +  // byte swap x86 long
     8.9 +  address generate_ghash_long_swap_mask() {
    8.10 +    __ align(CodeEntryAlignment);
    8.11 +    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
    8.12 +    address start = __ pc();
    8.13 +    __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
    8.14 +    __ emit_data64(0x0706050403020100, relocInfo::none );
    8.15 +  return start;
    8.16 +  }
    8.17 +
    8.18 +  // byte swap x86 byte array
    8.19 +  address generate_ghash_byte_swap_mask() {
    8.20 +    __ align(CodeEntryAlignment);
    8.21 +    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
    8.22 +    address start = __ pc();
    8.23 +    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
    8.24 +    __ emit_data64(0x0001020304050607, relocInfo::none );
    8.25 +  return start;
    8.26 +  }
    8.27 +
    8.28 +  /* Single and multi-block ghash operations */
    8.29 +  address generate_ghash_processBlocks() {
    8.30 +    __ align(CodeEntryAlignment);
    8.31 +    Label L_ghash_loop, L_exit;
    8.32 +    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
    8.33 +    address start = __ pc();
    8.34 +
    8.35 +    const Register state        = c_rarg0;
    8.36 +    const Register subkeyH      = c_rarg1;
    8.37 +    const Register data         = c_rarg2;
    8.38 +    const Register blocks       = c_rarg3;
    8.39 +
    8.40 +#ifdef _WIN64
    8.41 +    const int XMM_REG_LAST  = 10;
    8.42 +#endif
    8.43 +
    8.44 +    const XMMRegister xmm_temp0 = xmm0;
    8.45 +    const XMMRegister xmm_temp1 = xmm1;
    8.46 +    const XMMRegister xmm_temp2 = xmm2;
    8.47 +    const XMMRegister xmm_temp3 = xmm3;
    8.48 +    const XMMRegister xmm_temp4 = xmm4;
    8.49 +    const XMMRegister xmm_temp5 = xmm5;
    8.50 +    const XMMRegister xmm_temp6 = xmm6;
    8.51 +    const XMMRegister xmm_temp7 = xmm7;
    8.52 +    const XMMRegister xmm_temp8 = xmm8;
    8.53 +    const XMMRegister xmm_temp9 = xmm9;
    8.54 +    const XMMRegister xmm_temp10 = xmm10;
    8.55 +
    8.56 +    __ enter();
    8.57 +
    8.58 +#ifdef _WIN64
    8.59 +    // save the xmm registers which must be preserved 6-10
    8.60 +    __ subptr(rsp, -rsp_after_call_off * wordSize);
    8.61 +    for (int i = 6; i <= XMM_REG_LAST; i++) {
    8.62 +      __ movdqu(xmm_save(i), as_XMMRegister(i));
    8.63 +    }
    8.64 +#endif
    8.65 +
    8.66 +    __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
    8.67 +
    8.68 +    __ movdqu(xmm_temp0, Address(state, 0));
    8.69 +    __ pshufb(xmm_temp0, xmm_temp10);
    8.70 +
    8.71 +
    8.72 +    __ BIND(L_ghash_loop);
    8.73 +    __ movdqu(xmm_temp2, Address(data, 0));
    8.74 +    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
    8.75 +
    8.76 +    __ movdqu(xmm_temp1, Address(subkeyH, 0));
    8.77 +    __ pshufb(xmm_temp1, xmm_temp10);
    8.78 +
    8.79 +    __ pxor(xmm_temp0, xmm_temp2);
    8.80 +
    8.81 +    //
    8.82 +    // Multiply with the hash key
    8.83 +    //
    8.84 +    __ movdqu(xmm_temp3, xmm_temp0);
    8.85 +    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
    8.86 +    __ movdqu(xmm_temp4, xmm_temp0);
    8.87 +    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
    8.88 +
    8.89 +    __ movdqu(xmm_temp5, xmm_temp0);
    8.90 +    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
    8.91 +    __ movdqu(xmm_temp6, xmm_temp0);
    8.92 +    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
    8.93 +
    8.94 +    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
    8.95 +
    8.96 +    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
    8.97 +    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
    8.98 +    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
    8.99 +    __ pxor(xmm_temp3, xmm_temp5);
   8.100 +    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
   8.101 +                                        // of the carry-less multiplication of
   8.102 +                                        // xmm0 by xmm1.
   8.103 +
   8.104 +    // We shift the result of the multiplication by one bit position
   8.105 +    // to the left to cope for the fact that the bits are reversed.
   8.106 +    __ movdqu(xmm_temp7, xmm_temp3);
   8.107 +    __ movdqu(xmm_temp8, xmm_temp6);
   8.108 +    __ pslld(xmm_temp3, 1);
   8.109 +    __ pslld(xmm_temp6, 1);
   8.110 +    __ psrld(xmm_temp7, 31);
   8.111 +    __ psrld(xmm_temp8, 31);
   8.112 +    __ movdqu(xmm_temp9, xmm_temp7);
   8.113 +    __ pslldq(xmm_temp8, 4);
   8.114 +    __ pslldq(xmm_temp7, 4);
   8.115 +    __ psrldq(xmm_temp9, 12);
   8.116 +    __ por(xmm_temp3, xmm_temp7);
   8.117 +    __ por(xmm_temp6, xmm_temp8);
   8.118 +    __ por(xmm_temp6, xmm_temp9);
   8.119 +
   8.120 +    //
   8.121 +    // First phase of the reduction
   8.122 +    //
   8.123 +    // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
   8.124 +    // independently.
   8.125 +    __ movdqu(xmm_temp7, xmm_temp3);
   8.126 +    __ movdqu(xmm_temp8, xmm_temp3);
   8.127 +    __ movdqu(xmm_temp9, xmm_temp3);
   8.128 +    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
   8.129 +    __ pslld(xmm_temp8, 30);    // packed right shift shifting << 30
   8.130 +    __ pslld(xmm_temp9, 25);    // packed right shift shifting << 25
   8.131 +    __ pxor(xmm_temp7, xmm_temp8);      // xor the shifted versions
   8.132 +    __ pxor(xmm_temp7, xmm_temp9);
   8.133 +    __ movdqu(xmm_temp8, xmm_temp7);
   8.134 +    __ pslldq(xmm_temp7, 12);
   8.135 +    __ psrldq(xmm_temp8, 4);
   8.136 +    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
   8.137 +
   8.138 +    //
   8.139 +    // Second phase of the reduction
   8.140 +    //
   8.141 +    // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
   8.142 +    // shift operations.
   8.143 +    __ movdqu(xmm_temp2, xmm_temp3);
   8.144 +    __ movdqu(xmm_temp4, xmm_temp3);
   8.145 +    __ movdqu(xmm_temp5, xmm_temp3);
   8.146 +    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
   8.147 +    __ psrld(xmm_temp4, 2);     // packed left shifting >> 2
   8.148 +    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
   8.149 +    __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
   8.150 +    __ pxor(xmm_temp2, xmm_temp5);
   8.151 +    __ pxor(xmm_temp2, xmm_temp8);
   8.152 +    __ pxor(xmm_temp3, xmm_temp2);
   8.153 +    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
   8.154 +
   8.155 +    __ decrement(blocks);
   8.156 +    __ jcc(Assembler::zero, L_exit);
   8.157 +    __ movdqu(xmm_temp0, xmm_temp6);
   8.158 +    __ addptr(data, 16);
   8.159 +    __ jmp(L_ghash_loop);
   8.160 +
   8.161 +    __ BIND(L_exit);
   8.162 +    __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
   8.163 +    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
   8.164 +
   8.165 +#ifdef _WIN64
   8.166 +    // restore xmm regs belonging to calling function
   8.167 +    for (int i = 6; i <= XMM_REG_LAST; i++) {
   8.168 +      __ movdqu(as_XMMRegister(i), xmm_save(i));
   8.169 +    }
   8.170 +#endif
   8.171 +    __ leave();
   8.172 +    __ ret(0);
   8.173 +    return start;
   8.174 +  }
   8.175 +
   8.176    /**
   8.177     *  Arguments:
   8.178     *
   8.179 @@ -4077,6 +4246,13 @@
   8.180        StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
   8.181      }
   8.182  
   8.183 +    // Generate GHASH intrinsics code
   8.184 +    if (UseGHASHIntrinsics) {
   8.185 +      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
   8.186 +      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
   8.187 +      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
   8.188 +    }
   8.189 +
   8.190      // Safefetch stubs.
   8.191      generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
   8.192                                                         &StubRoutines::_safefetch32_fault_pc,
     9.1 --- a/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Jul 31 14:28:51 2019 -0400
     9.2 +++ b/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Jun 17 17:48:25 2015 -0700
     9.3 @@ -1,5 +1,5 @@
     9.4  /*
     9.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
     9.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
     9.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     9.8   *
     9.9   * This code is free software; you can redistribute it and/or modify it
    9.10 @@ -33,6 +33,8 @@
    9.11  
    9.12  address StubRoutines::x86::_verify_mxcsr_entry = NULL;
    9.13  address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
    9.14 +address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
    9.15 +address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
    9.16  
    9.17  uint64_t StubRoutines::x86::_crc_by128_masks[] =
    9.18  {
    10.1 --- a/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Jul 31 14:28:51 2019 -0400
    10.2 +++ b/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Jun 17 17:48:25 2015 -0700
    10.3 @@ -1,5 +1,5 @@
    10.4  /*
    10.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
    10.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
    10.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    10.8   *
    10.9   * This code is free software; you can redistribute it and/or modify it
   10.10 @@ -36,10 +36,15 @@
   10.11    // masks and table for CRC32
   10.12    static uint64_t _crc_by128_masks[];
   10.13    static juint    _crc_table[];
   10.14 +  // swap mask for ghash
   10.15 +  static address _ghash_long_swap_mask_addr;
   10.16 +  static address _ghash_byte_swap_mask_addr;
   10.17  
   10.18   public:
   10.19    static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
   10.20    static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
   10.21    static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
   10.22 +  static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
   10.23 +  static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
   10.24  
   10.25  #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
    11.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Jul 31 14:28:51 2019 -0400
    11.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed Jun 17 17:48:25 2015 -0700
    11.3 @@ -594,6 +594,17 @@
    11.4      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
    11.5    }
    11.6  
    11.7 +  // GHASH/GCM intrinsics
    11.8 +  if (UseCLMUL && (UseSSE > 2)) {
    11.9 +    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
   11.10 +      UseGHASHIntrinsics = true;
   11.11 +    }
   11.12 +  } else if (UseGHASHIntrinsics) {
   11.13 +    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
   11.14 +      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
   11.15 +    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
   11.16 +  }
   11.17 +
   11.18    if (UseSHA) {
   11.19      warning("SHA instructions are not available on this CPU");
   11.20      FLAG_SET_DEFAULT(UseSHA, false);
    12.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Wed Jul 31 14:28:51 2019 -0400
    12.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Jun 17 17:48:25 2015 -0700
    12.3 @@ -863,6 +863,12 @@
    12.4     do_name(     implCompressMB_name,                               "implCompressMultiBlock0")                           \
    12.5     do_signature(implCompressMB_signature,                          "([BII)I")                                           \
    12.6                                                                                                                          \
    12.7 +  /* support for com.sun.crypto.provider.GHASH */                                                                       \
    12.8 +  do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH")                                              \
    12.9 +  do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \
   12.10 +   do_name(processBlocks_name, "processBlocks")                                                                         \
   12.11 +   do_signature(ghash_processBlocks_signature, "([BII[J[J)V")                                                           \
   12.12 +                                                                                                                        \
   12.13    /* support for java.util.zip */                                                                                       \
   12.14    do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
   12.15    do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
    13.1 --- a/src/share/vm/opto/escape.cpp	Wed Jul 31 14:28:51 2019 -0400
    13.2 +++ b/src/share/vm/opto/escape.cpp	Wed Jun 17 17:48:25 2015 -0700
    13.3 @@ -952,6 +952,7 @@
    13.4                    strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
    13.5                    strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
    13.6                    strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
    13.7 +                  strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
    13.8                    strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
    13.9                    strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
   13.10                    strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
    14.1 --- a/src/share/vm/opto/library_call.cpp	Wed Jul 31 14:28:51 2019 -0400
    14.2 +++ b/src/share/vm/opto/library_call.cpp	Wed Jun 17 17:48:25 2015 -0700
    14.3 @@ -311,6 +311,7 @@
    14.4    Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
    14.5    Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
    14.6    Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
    14.7 +  bool inline_ghash_processBlocks();
    14.8    bool inline_sha_implCompress(vmIntrinsics::ID id);
    14.9    bool inline_digestBase_implCompressMB(int predicate);
   14.10    bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
   14.11 @@ -570,6 +571,10 @@
   14.12      predicates = 3;
   14.13      break;
   14.14  
   14.15 +  case vmIntrinsics::_ghash_processBlocks:
   14.16 +    if (!UseGHASHIntrinsics) return NULL;
   14.17 +    break;
   14.18 +
   14.19    case vmIntrinsics::_updateCRC32:
   14.20    case vmIntrinsics::_updateBytesCRC32:
   14.21    case vmIntrinsics::_updateByteBufferCRC32:
   14.22 @@ -957,6 +962,9 @@
   14.23    case vmIntrinsics::_montgomerySquare:
   14.24      return inline_montgomerySquare();
   14.25  
   14.26 +  case vmIntrinsics::_ghash_processBlocks:
   14.27 +    return inline_ghash_processBlocks();
   14.28 +
   14.29    case vmIntrinsics::_encodeISOArray:
   14.30      return inline_encodeISOArray();
   14.31  
   14.32 @@ -6599,6 +6607,35 @@
   14.33    return _gvn.transform(region);
   14.34  }
   14.35  
   14.36 +//------------------------------inline_ghash_processBlocks
   14.37 +bool LibraryCallKit::inline_ghash_processBlocks() {
   14.38 +  address stubAddr;
   14.39 +  const char *stubName;
   14.40 +  assert(UseGHASHIntrinsics, "need GHASH intrinsics support");
   14.41 +
   14.42 +  stubAddr = StubRoutines::ghash_processBlocks();
   14.43 +  stubName = "ghash_processBlocks";
   14.44 +
   14.45 +  Node* data           = argument(0);
   14.46 +  Node* offset         = argument(1);
   14.47 +  Node* len            = argument(2);
   14.48 +  Node* state          = argument(3);
   14.49 +  Node* subkeyH        = argument(4);
   14.50 +
   14.51 +  Node* state_start  = array_element_address(state, intcon(0), T_LONG);
   14.52 +  assert(state_start, "state is NULL");
   14.53 +  Node* subkeyH_start  = array_element_address(subkeyH, intcon(0), T_LONG);
   14.54 +  assert(subkeyH_start, "subkeyH is NULL");
   14.55 +  Node* data_start  = array_element_address(data, offset, T_BYTE);
   14.56 +  assert(data_start, "data is NULL");
   14.57 +
   14.58 +  Node* ghash = make_runtime_call(RC_LEAF|RC_NO_FP,
   14.59 +                                  OptoRuntime::ghash_processBlocks_Type(),
   14.60 +                                  stubAddr, stubName, TypePtr::BOTTOM,
   14.61 +                                  state_start, subkeyH_start, data_start, len);
   14.62 +  return true;
   14.63 +}
   14.64 +
   14.65  //------------------------------inline_sha_implCompress-----------------------
   14.66  //
   14.67  // Calculate SHA (i.e., SHA-1) for single-block byte[] array.
    15.1 --- a/src/share/vm/opto/runtime.cpp	Wed Jul 31 14:28:51 2019 -0400
    15.2 +++ b/src/share/vm/opto/runtime.cpp	Wed Jun 17 17:48:25 2015 -0700
    15.3 @@ -92,7 +92,25 @@
    15.4  // At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000
    15.5  
    15.6  
    15.7 +// GHASH block processing
    15.8 +const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
    15.9 +    int argcnt = 4;
   15.10  
   15.11 +    const Type** fields = TypeTuple::fields(argcnt);
   15.12 +    int argp = TypeFunc::Parms;
   15.13 +    fields[argp++] = TypePtr::NOTNULL;    // state
   15.14 +    fields[argp++] = TypePtr::NOTNULL;    // subkeyH
   15.15 +    fields[argp++] = TypePtr::NOTNULL;    // data
   15.16 +    fields[argp++] = TypeInt::INT;        // blocks
   15.17 +    assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   15.18 +    const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
   15.19 +
   15.20 +    // result type needed
   15.21 +    fields = TypeTuple::fields(1);
   15.22 +    fields[TypeFunc::Parms+0] = NULL; // void
   15.23 +    const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
   15.24 +    return TypeFunc::make(domain, range);
   15.25 +}
   15.26  
   15.27  // Compiled code entry points
   15.28  address OptoRuntime::_new_instance_Java                           = NULL;
    16.1 --- a/src/share/vm/opto/runtime.hpp	Wed Jul 31 14:28:51 2019 -0400
    16.2 +++ b/src/share/vm/opto/runtime.hpp	Wed Jun 17 17:48:25 2015 -0700
    16.3 @@ -311,6 +311,8 @@
    16.4    static const TypeFunc* montgomeryMultiply_Type();
    16.5    static const TypeFunc* montgomerySquare_Type();
    16.6  
    16.7 +  static const TypeFunc* ghash_processBlocks_Type();
    16.8 +
    16.9    static const TypeFunc* updateBytesCRC32_Type();
   16.10  
   16.11    // leaf on stack replacement interpreter accessor types
    17.1 --- a/src/share/vm/runtime/globals.hpp	Wed Jul 31 14:28:51 2019 -0400
    17.2 +++ b/src/share/vm/runtime/globals.hpp	Wed Jun 17 17:48:25 2015 -0700
    17.3 @@ -602,6 +602,9 @@
    17.4    product(bool, UseSHA, false,                                              \
    17.5            "Control whether SHA instructions can be used on SPARC")          \
    17.6                                                                              \
    17.7 +  product(bool, UseGHASHIntrinsics, false,                                  \
    17.8 +          "Use intrinsics for GHASH versions of crypto")                    \
    17.9 +                                                                            \
   17.10    product(uintx, LargePageSizeInBytes, 0,                                   \
   17.11            "Large page size (0 to let VM choose the page size)")             \
   17.12                                                                              \
    18.1 --- a/src/share/vm/runtime/stubRoutines.cpp	Wed Jul 31 14:28:51 2019 -0400
    18.2 +++ b/src/share/vm/runtime/stubRoutines.cpp	Wed Jun 17 17:48:25 2015 -0700
    18.3 @@ -124,6 +124,7 @@
    18.4  address StubRoutines::_aescrypt_decryptBlock               = NULL;
    18.5  address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
    18.6  address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
    18.7 +address StubRoutines::_ghash_processBlocks                 = NULL;
    18.8  
    18.9  address StubRoutines::_sha1_implCompress     = NULL;
   18.10  address StubRoutines::_sha1_implCompressMB   = NULL;
    19.1 --- a/src/share/vm/runtime/stubRoutines.hpp	Wed Jul 31 14:28:51 2019 -0400
    19.2 +++ b/src/share/vm/runtime/stubRoutines.hpp	Wed Jun 17 17:48:25 2015 -0700
    19.3 @@ -197,6 +197,7 @@
    19.4    static address _aescrypt_decryptBlock;
    19.5    static address _cipherBlockChaining_encryptAESCrypt;
    19.6    static address _cipherBlockChaining_decryptAESCrypt;
    19.7 +  static address _ghash_processBlocks;
    19.8  
    19.9    static address _sha1_implCompress;
   19.10    static address _sha1_implCompressMB;
   19.11 @@ -359,6 +360,7 @@
   19.12    static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
   19.13    static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   19.14    static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
   19.15 +  static address ghash_processBlocks() { return _ghash_processBlocks; }
   19.16  
   19.17    static address sha1_implCompress()     { return _sha1_implCompress; }
   19.18    static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
    20.1 --- a/src/share/vm/runtime/vmStructs.cpp	Wed Jul 31 14:28:51 2019 -0400
    20.2 +++ b/src/share/vm/runtime/vmStructs.cpp	Wed Jun 17 17:48:25 2015 -0700
    20.3 @@ -810,6 +810,7 @@
    20.4       static_field(StubRoutines,                _aescrypt_decryptBlock,                        address)                               \
    20.5       static_field(StubRoutines,                _cipherBlockChaining_encryptAESCrypt,          address)                               \
    20.6       static_field(StubRoutines,                _cipherBlockChaining_decryptAESCrypt,          address)                               \
    20.7 +     static_field(StubRoutines,                _ghash_processBlocks,                          address)                               \
    20.8       static_field(StubRoutines,                _updateBytesCRC32,                             address)                               \
    20.9       static_field(StubRoutines,                _crc_table_adr,                                address)                               \
   20.10       static_field(StubRoutines,                _multiplyToLen,                                address)                               \
    21.1 --- a/test/compiler/7184394/TestAESBase.java	Wed Jul 31 14:28:51 2019 -0400
    21.2 +++ b/test/compiler/7184394/TestAESBase.java	Wed Jun 17 17:48:25 2015 -0700
    21.3 @@ -29,6 +29,7 @@
    21.4  import javax.crypto.Cipher;
    21.5  import javax.crypto.KeyGenerator;
    21.6  import javax.crypto.SecretKey;
    21.7 +import javax.crypto.spec.GCMParameterSpec;
    21.8  import javax.crypto.spec.IvParameterSpec;
    21.9  import javax.crypto.spec.SecretKeySpec;
   21.10  import java.security.AlgorithmParameters;
   21.11 @@ -64,6 +65,10 @@
   21.12    Cipher dCipher;
   21.13    AlgorithmParameters algParams;
   21.14    SecretKey key;
   21.15 +  GCMParameterSpec gcm_spec;
   21.16 +  byte[] aad;
   21.17 +  int tlen = 12;
   21.18 +  byte[] iv;
   21.19  
   21.20    static int numThreads = 0;
   21.21    int  threadId;
   21.22 @@ -102,6 +107,12 @@
   21.23          int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
   21.24          IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
   21.25          cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
   21.26 +      } else if (mode.equals("GCM")) {
   21.27 +          iv = new byte[64];
   21.28 +          random.nextBytes(iv);
   21.29 +          aad = new byte[5];
   21.30 +          random.nextBytes(aad);
   21.31 +          gcm_init();
   21.32        } else {
   21.33          algParams = cipher.getParameters();
   21.34          cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
   21.35 @@ -188,4 +199,12 @@
   21.36    }
   21.37  
   21.38    abstract void childShowCipher();
   21.39 +
   21.40 +  void gcm_init() throws Exception {
   21.41 +    tlen = 12;
   21.42 +    gcm_spec = new GCMParameterSpec(tlen * 8, iv);
   21.43 +    cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
   21.44 +    cipher.init(Cipher.ENCRYPT_MODE, key, gcm_spec);
   21.45 +    cipher.update(aad);
   21.46 +  }
   21.47  }
    22.1 --- a/test/compiler/7184394/TestAESEncode.java	Wed Jul 31 14:28:51 2019 -0400
    22.2 +++ b/test/compiler/7184394/TestAESEncode.java	Wed Jun 17 17:48:25 2015 -0700
    22.3 @@ -1,5 +1,5 @@
    22.4  /*
    22.5 - * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
    22.6 + * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
    22.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    22.8   *
    22.9   * This code is free software; you can redistribute it and/or modify it
   22.10 @@ -32,7 +32,11 @@
   22.11    @Override
   22.12    public void run() {
   22.13      try {
   22.14 -      if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
   22.15 +      if (mode.equals("GCM")) {
   22.16 +        gcm_init();
   22.17 +      } else if (!noReinit) {
   22.18 +        cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
   22.19 +      }
   22.20        encode = new byte[encodeLength];
   22.21        if (testingMisalignment) {
   22.22          int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
    23.1 --- a/test/compiler/7184394/TestAESMain.java	Wed Jul 31 14:28:51 2019 -0400
    23.2 +++ b/test/compiler/7184394/TestAESMain.java	Wed Jun 17 17:48:25 2015 -0700
    23.3 @@ -41,6 +41,13 @@
    23.4   * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
    23.5   * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
    23.6   * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
    23.7 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM TestAESMain
    23.8 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 TestAESMain
    23.9 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 TestAESMain
   23.10 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 TestAESMain
   23.11 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
   23.12 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
   23.13 + * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
   23.14   *
   23.15   * @author Tom Deneau
   23.16   */

mercurial