8035968: Leverage CPU Instructions to Improve SHA Performance on SPARC

Wed, 11 Jun 2014 11:05:10 -0700

author
kvn
date
Wed, 11 Jun 2014 11:05:10 -0700
changeset 7027
b20a35eae442
parent 7026
922c87c9aed4
child 7029
f5b4600d7368

8035968: Leverage CPU Instructions to Improve SHA Performance on SPARC
Summary: Add C2 SHA intrinsics on SPARC
Reviewed-by: kvn, roland
Contributed-by: james.cheng@oracle.com

src/cpu/sparc/vm/assembler_sparc.hpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/stubGenerator_sparc.cpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/stubRoutines_sparc.hpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/vm_version_sparc.cpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/vm_version_sparc.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/escape.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/stubRoutines.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/stubRoutines.hpp file | annotate | diff | comparison | revisions
test/compiler/intrinsics/sha/TestSHA.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp	Tue Jun 10 12:28:06 2014 -0700
     1.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jun 11 11:05:10 2014 -0700
     1.3 @@ -123,6 +123,7 @@
     1.4      fpop2_op3    = 0x35,
     1.5      impdep1_op3  = 0x36,
     1.6      aes3_op3     = 0x36,
     1.7 +    sha_op3      = 0x36,
     1.8      alignaddr_op3  = 0x36,
     1.9      faligndata_op3 = 0x36,
    1.10      flog3_op3    = 0x36,
    1.11 @@ -223,7 +224,11 @@
    1.12      mwtos_opf          = 0x119,
    1.13  
    1.14      aes_kexpand0_opf   = 0x130,
    1.15 -    aes_kexpand2_opf   = 0x131
    1.16 +    aes_kexpand2_opf   = 0x131,
    1.17 +
    1.18 +    sha1_opf           = 0x141,
    1.19 +    sha256_opf         = 0x142,
    1.20 +    sha512_opf         = 0x143
    1.21    };
    1.22  
    1.23    enum op5s {
    1.24 @@ -595,6 +600,11 @@
    1.25    // AES crypto instructions supported only on certain processors
    1.26    static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
    1.27  
    1.28 +  // SHA crypto instructions supported only on certain processors
    1.29 +  static void sha1_only()   { assert( VM_Version::has_sha1(),   "This instruction only works on SPARC with SHA1"); }
    1.30 +  static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); }
    1.31 +  static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); }
    1.32 +
    1.33    // instruction only in VIS1
    1.34    static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
    1.35  
    1.36 @@ -1179,7 +1189,6 @@
    1.37                                                 u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
    1.38    inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
    1.39  
    1.40 -
    1.41    //  VIS1 instructions
    1.42  
    1.43    void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
    1.44 @@ -1203,6 +1212,12 @@
    1.45    void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
    1.46    void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
    1.47  
    1.48 +  // Crypto SHA instructions
    1.49 +
    1.50 +  void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
    1.51 +  void sha256() { sha256_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); }
    1.52 +  void sha512() { sha512_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); }
    1.53 +
    1.54    // Creation
    1.55    Assembler(CodeBuffer* code) : AbstractAssembler(code) {
    1.56  #ifdef CHECK_DELAY
     2.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Jun 10 12:28:06 2014 -0700
     2.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jun 11 11:05:10 2014 -0700
     2.3 @@ -4575,6 +4575,219 @@
     2.4      return start;
     2.5    }
     2.6  
     2.7 +  address generate_sha1_implCompress(bool multi_block, const char *name) {
     2.8 +    __ align(CodeEntryAlignment);
     2.9 +    StubCodeMark mark(this, "StubRoutines", name);
    2.10 +    address start = __ pc();
    2.11 +
    2.12 +    Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop;
    2.13 +    int i;
    2.14 +
    2.15 +    Register buf   = O0; // byte[] source+offset
    2.16 +    Register state = O1; // int[]  SHA.state
    2.17 +    Register ofs   = O2; // int    offset
    2.18 +    Register limit = O3; // int    limit
    2.19 +
    2.20 +    // load state into F0-F4
    2.21 +    for (i = 0; i < 5; i++) {
    2.22 +      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
    2.23 +    }
    2.24 +
    2.25 +    __ andcc(buf, 7, G0);
    2.26 +    __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input);
    2.27 +    __ delayed()->nop();
    2.28 +
    2.29 +    __ BIND(L_sha1_loop);
    2.30 +    // load buf into F8-F22
    2.31 +    for (i = 0; i < 8; i++) {
    2.32 +      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
    2.33 +    }
    2.34 +    __ sha1();
    2.35 +    if (multi_block) {
    2.36 +      __ add(ofs, 64, ofs);
    2.37 +      __ add(buf, 64, buf);
    2.38 +      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop);
    2.39 +      __ mov(ofs, O0); // to be returned
    2.40 +    }
    2.41 +
    2.42 +    // store F0-F4 into state and return
    2.43 +    for (i = 0; i < 4; i++) {
    2.44 +      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
    2.45 +    }
    2.46 +    __ retl();
    2.47 +    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
    2.48 +
    2.49 +    __ BIND(L_sha1_unaligned_input);
    2.50 +    __ alignaddr(buf, G0, buf);
    2.51 +
    2.52 +    __ BIND(L_sha1_unaligned_input_loop);
    2.53 +    // load buf into F8-F22
    2.54 +    for (i = 0; i < 9; i++) {
    2.55 +      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
    2.56 +    }
    2.57 +    for (i = 0; i < 8; i++) {
    2.58 +      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
    2.59 +    }
    2.60 +    __ sha1();
    2.61 +    if (multi_block) {
    2.62 +      __ add(ofs, 64, ofs);
    2.63 +      __ add(buf, 64, buf);
    2.64 +      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop);
    2.65 +      __ mov(ofs, O0); // to be returned
    2.66 +    }
    2.67 +
    2.68 +    // store F0-F4 into state and return
    2.69 +    for (i = 0; i < 4; i++) {
    2.70 +      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
    2.71 +    }
    2.72 +    __ retl();
    2.73 +    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
    2.74 +
    2.75 +    return start;
    2.76 +  }
    2.77 +
    2.78 +  address generate_sha256_implCompress(bool multi_block, const char *name) {
    2.79 +    __ align(CodeEntryAlignment);
    2.80 +    StubCodeMark mark(this, "StubRoutines", name);
    2.81 +    address start = __ pc();
    2.82 +
    2.83 +    Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop;
    2.84 +    int i;
    2.85 +
    2.86 +    Register buf   = O0; // byte[] source+offset
    2.87 +    Register state = O1; // int[]  SHA2.state
    2.88 +    Register ofs   = O2; // int    offset
    2.89 +    Register limit = O3; // int    limit
    2.90 +
    2.91 +    // load state into F0-F7
    2.92 +    for (i = 0; i < 8; i++) {
    2.93 +      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
    2.94 +    }
    2.95 +
    2.96 +    __ andcc(buf, 7, G0);
    2.97 +    __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input);
    2.98 +    __ delayed()->nop();
    2.99 +
   2.100 +    __ BIND(L_sha256_loop);
   2.101 +    // load buf into F8-F22
   2.102 +    for (i = 0; i < 8; i++) {
   2.103 +      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
   2.104 +    }
   2.105 +    __ sha256();
   2.106 +    if (multi_block) {
   2.107 +      __ add(ofs, 64, ofs);
   2.108 +      __ add(buf, 64, buf);
   2.109 +      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop);
   2.110 +      __ mov(ofs, O0); // to be returned
   2.111 +    }
   2.112 +
   2.113 +    // store F0-F7 into state and return
   2.114 +    for (i = 0; i < 7; i++) {
   2.115 +      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
   2.116 +    }
   2.117 +    __ retl();
   2.118 +    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
   2.119 +
   2.120 +    __ BIND(L_sha256_unaligned_input);
   2.121 +    __ alignaddr(buf, G0, buf);
   2.122 +
   2.123 +    __ BIND(L_sha256_unaligned_input_loop);
   2.124 +    // load buf into F8-F22
   2.125 +    for (i = 0; i < 9; i++) {
   2.126 +      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
   2.127 +    }
   2.128 +    for (i = 0; i < 8; i++) {
   2.129 +      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
   2.130 +    }
   2.131 +    __ sha256();
   2.132 +    if (multi_block) {
   2.133 +      __ add(ofs, 64, ofs);
   2.134 +      __ add(buf, 64, buf);
   2.135 +      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop);
   2.136 +      __ mov(ofs, O0); // to be returned
   2.137 +    }
   2.138 +
   2.139 +    // store F0-F7 into state and return
   2.140 +    for (i = 0; i < 7; i++) {
   2.141 +      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
   2.142 +    }
   2.143 +    __ retl();
   2.144 +    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
   2.145 +
   2.146 +    return start;
   2.147 +  }
   2.148 +
   2.149 +  address generate_sha512_implCompress(bool multi_block, const char *name) {
   2.150 +    __ align(CodeEntryAlignment);
   2.151 +    StubCodeMark mark(this, "StubRoutines", name);
   2.152 +    address start = __ pc();
   2.153 +
   2.154 +    Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop;
   2.155 +    int i;
   2.156 +
   2.157 +    Register buf   = O0; // byte[] source+offset
   2.158 +    Register state = O1; // long[] SHA5.state
   2.159 +    Register ofs   = O2; // int    offset
   2.160 +    Register limit = O3; // int    limit
   2.161 +
   2.162 +    // load state into F0-F14
   2.163 +    for (i = 0; i < 8; i++) {
   2.164 +      __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2));
   2.165 +    }
   2.166 +
   2.167 +    __ andcc(buf, 7, G0);
   2.168 +    __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input);
   2.169 +    __ delayed()->nop();
   2.170 +
   2.171 +    __ BIND(L_sha512_loop);
   2.172 +    // load buf into F16-F46
   2.173 +    for (i = 0; i < 16; i++) {
   2.174 +      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
   2.175 +    }
   2.176 +    __ sha512();
   2.177 +    if (multi_block) {
   2.178 +      __ add(ofs, 128, ofs);
   2.179 +      __ add(buf, 128, buf);
   2.180 +      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop);
   2.181 +      __ mov(ofs, O0); // to be returned
   2.182 +    }
   2.183 +
   2.184 +    // store F0-F14 into state and return
   2.185 +    for (i = 0; i < 7; i++) {
   2.186 +      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
   2.187 +    }
   2.188 +    __ retl();
   2.189 +    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
   2.190 +
   2.191 +    __ BIND(L_sha512_unaligned_input);
   2.192 +    __ alignaddr(buf, G0, buf);
   2.193 +
   2.194 +    __ BIND(L_sha512_unaligned_input_loop);
   2.195 +    // load buf into F16-F46
   2.196 +    for (i = 0; i < 17; i++) {
   2.197 +      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
   2.198 +    }
   2.199 +    for (i = 0; i < 16; i++) {
   2.200 +      __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16));
   2.201 +    }
   2.202 +    __ sha512();
   2.203 +    if (multi_block) {
   2.204 +      __ add(ofs, 128, ofs);
   2.205 +      __ add(buf, 128, buf);
   2.206 +      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop);
   2.207 +      __ mov(ofs, O0); // to be returned
   2.208 +    }
   2.209 +
   2.210 +    // store F0-F14 into state and return
   2.211 +    for (i = 0; i < 7; i++) {
   2.212 +      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
   2.213 +    }
   2.214 +    __ retl();
   2.215 +    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
   2.216 +
   2.217 +    return start;
   2.218 +  }
   2.219 +
   2.220    void generate_initial() {
   2.221      // Generates all stubs and initializes the entry points
   2.222  
   2.223 @@ -4647,6 +4860,20 @@
   2.224        StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
   2.225        StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
   2.226      }
   2.227 +
   2.228 +    // generate SHA1/SHA256/SHA512 intrinsics code
   2.229 +    if (UseSHA1Intrinsics) {
   2.230 +      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false,   "sha1_implCompress");
   2.231 +      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true,    "sha1_implCompressMB");
   2.232 +    }
   2.233 +    if (UseSHA256Intrinsics) {
   2.234 +      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
   2.235 +      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
   2.236 +    }
   2.237 +    if (UseSHA512Intrinsics) {
   2.238 +      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
   2.239 +      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
   2.240 +    }
   2.241    }
   2.242  
   2.243  
     3.1 --- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Tue Jun 10 12:28:06 2014 -0700
     3.2 +++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Wed Jun 11 11:05:10 2014 -0700
     3.3 @@ -41,7 +41,7 @@
     3.4  enum /* platform_dependent_constants */ {
     3.5    // %%%%%%%% May be able to shrink this a lot
     3.6    code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
     3.7 -  code_size2 = 22000            // simply increase if too small (assembler will crash if too small)
     3.8 +  code_size2 = 23000            // simply increase if too small (assembler will crash if too small)
     3.9  };
    3.10  
    3.11  class Sparc {
     4.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Tue Jun 10 12:28:06 2014 -0700
     4.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Jun 11 11:05:10 2014 -0700
     4.3 @@ -234,7 +234,7 @@
     4.4    assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
     4.5  
     4.6    char buf[512];
     4.7 -  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     4.8 +  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     4.9                 (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
    4.10                 (has_hardware_popc() ? ", popc" : ""),
    4.11                 (has_vis1() ? ", vis1" : ""),
    4.12 @@ -243,6 +243,9 @@
    4.13                 (has_blk_init() ? ", blk_init" : ""),
    4.14                 (has_cbcond() ? ", cbcond" : ""),
    4.15                 (has_aes() ? ", aes" : ""),
    4.16 +               (has_sha1() ? ", sha1" : ""),
    4.17 +               (has_sha256() ? ", sha256" : ""),
    4.18 +               (has_sha512() ? ", sha512" : ""),
    4.19                 (is_ultra3() ? ", ultra3" : ""),
    4.20                 (is_sun4v() ? ", sun4v" : ""),
    4.21                 (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
    4.22 @@ -301,6 +304,58 @@
    4.23      }
    4.24    }
    4.25  
    4.26 +  // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
    4.27 +  if (has_sha1() || has_sha256() || has_sha512()) {
    4.28 +    if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
    4.29 +      if (FLAG_IS_DEFAULT(UseSHA)) {
    4.30 +        FLAG_SET_DEFAULT(UseSHA, true);
    4.31 +      }
    4.32 +    } else {
    4.33 +      if (UseSHA) {
    4.34 +        warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
    4.35 +        FLAG_SET_DEFAULT(UseSHA, false);
    4.36 +      }
    4.37 +    }
    4.38 +  } else if (UseSHA) {
    4.39 +    warning("SHA instructions are not available on this CPU");
    4.40 +    FLAG_SET_DEFAULT(UseSHA, false);
    4.41 +  }
    4.42 +
    4.43 +  if (!UseSHA) {
    4.44 +    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
    4.45 +    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
    4.46 +    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
    4.47 +  } else {
    4.48 +    if (has_sha1()) {
    4.49 +      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
    4.50 +        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
    4.51 +      }
    4.52 +    } else if (UseSHA1Intrinsics) {
    4.53 +      warning("SHA1 instruction is not available on this CPU.");
    4.54 +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
    4.55 +    }
    4.56 +    if (has_sha256()) {
    4.57 +      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
    4.58 +        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
    4.59 +      }
    4.60 +    } else if (UseSHA256Intrinsics) {
    4.61 +      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
    4.62 +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
    4.63 +    }
    4.64 +
    4.65 +    if (has_sha512()) {
    4.66 +      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
    4.67 +        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
    4.68 +      }
    4.69 +    } else if (UseSHA512Intrinsics) {
    4.70 +      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
    4.71 +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
    4.72 +    }
    4.73 +    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
    4.74 +      FLAG_SET_DEFAULT(UseSHA, false);
    4.75 +    }
    4.76 +  }
    4.77 +
    4.78    if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
    4.79      (cache_line_size > ContendedPaddingWidth))
    4.80      ContendedPaddingWidth = cache_line_size;
     5.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Tue Jun 10 12:28:06 2014 -0700
     5.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Jun 11 11:05:10 2014 -0700
     5.3 @@ -1,5 +1,5 @@
     5.4  /*
     5.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
     5.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     5.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5.8   *
     5.9   * This code is free software; you can redistribute it and/or modify it
    5.10 @@ -50,7 +50,10 @@
    5.11      T_family             = 16,
    5.12      T1_model             = 17,
    5.13      sparc5_instructions  = 18,
    5.14 -    aes_instructions     = 19
    5.15 +    aes_instructions     = 19,
    5.16 +    sha1_instruction     = 20,
    5.17 +    sha256_instruction   = 21,
    5.18 +    sha512_instruction   = 22
    5.19    };
    5.20  
    5.21    enum Feature_Flag_Set {
    5.22 @@ -77,6 +80,9 @@
    5.23      T1_model_m              = 1 << T1_model,
    5.24      sparc5_instructions_m   = 1 << sparc5_instructions,
    5.25      aes_instructions_m      = 1 << aes_instructions,
    5.26 +    sha1_instruction_m      = 1 << sha1_instruction,
    5.27 +    sha256_instruction_m    = 1 << sha256_instruction,
    5.28 +    sha512_instruction_m    = 1 << sha512_instruction,
    5.29  
    5.30      generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
    5.31      generic_v9_m        = generic_v8_m | v9_instructions_m,
    5.32 @@ -129,6 +135,9 @@
    5.33    static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
    5.34    static bool has_sparc5_instr()        { return (_features & sparc5_instructions_m) != 0; }
    5.35    static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }
    5.36 +  static bool has_sha1()                { return (_features & sha1_instruction_m) != 0; }
    5.37 +  static bool has_sha256()              { return (_features & sha256_instruction_m) != 0; }
    5.38 +  static bool has_sha512()              { return (_features & sha512_instruction_m) != 0; }
    5.39  
    5.40    static bool supports_compare_and_exchange()
    5.41                                          { return has_v9(); }
     6.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jun 10 12:28:06 2014 -0700
     6.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed Jun 11 11:05:10 2014 -0700
     6.3 @@ -590,6 +590,17 @@
     6.4      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
     6.5    }
     6.6  
     6.7 +  if (UseSHA) {
     6.8 +    warning("SHA instructions are not available on this CPU");
     6.9 +    FLAG_SET_DEFAULT(UseSHA, false);
    6.10 +  }
    6.11 +  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
    6.12 +    warning("SHA intrinsics are not available on this CPU");
    6.13 +    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
    6.14 +    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
    6.15 +    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
    6.16 +  }
    6.17 +
    6.18    // Adjust RTM (Restricted Transactional Memory) flags
    6.19    if (!supports_rtm() && UseRTMLocking) {
    6.20      // Can't continue because UseRTMLocking affects UseBiasedLocking flag
     7.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Tue Jun 10 12:28:06 2014 -0700
     7.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Jun 11 11:05:10 2014 -0700
     7.3 @@ -1,5 +1,5 @@
     7.4  /*
     7.5 - * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
     7.6 + * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
     7.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     7.8   *
     7.9   * This code is free software; you can redistribute it and/or modify it
    7.10 @@ -137,6 +137,21 @@
    7.11  #endif
    7.12      if (av & AV_SPARC_AES)       features |= aes_instructions_m;
    7.13  
    7.14 +#ifndef AV_SPARC_SHA1
    7.15 +#define AV_SPARC_SHA1   0x00400000  /* sha1 instruction supported */
    7.16 +#endif
    7.17 +    if (av & AV_SPARC_SHA1)         features |= sha1_instruction_m;
    7.18 +
    7.19 +#ifndef AV_SPARC_SHA256
    7.20 +#define AV_SPARC_SHA256 0x00800000  /* sha256 instruction supported */
    7.21 +#endif
    7.22 +    if (av & AV_SPARC_SHA256)       features |= sha256_instruction_m;
    7.23 +
    7.24 +#ifndef AV_SPARC_SHA512
    7.25 +#define AV_SPARC_SHA512 0x01000000  /* sha512 instruction supported */
    7.26 +#endif
    7.27 +    if (av & AV_SPARC_SHA512)       features |= sha512_instruction_m;
    7.28 +
    7.29    } else {
    7.30      // getisax(2) failed, use the old legacy code.
    7.31  #ifndef PRODUCT
     8.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Tue Jun 10 12:28:06 2014 -0700
     8.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Jun 11 11:05:10 2014 -0700
     8.3 @@ -789,6 +789,26 @@
     8.4     do_name(     decrypt_name,                                      "decrypt")                                           \
     8.5     do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
     8.6                                                                                                                          \
     8.7 +  /* support for sun.security.provider.SHA */                                                                           \
     8.8 +  do_class(sun_security_provider_sha,                              "sun/security/provider/SHA")                         \
     8.9 +  do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R)            \
    8.10 +   do_name(     implCompress_name,                                 "implCompress")                                      \
    8.11 +   do_signature(implCompress_signature,                            "([BI)V")                                            \
    8.12 +                                                                                                                        \
    8.13 +  /* support for sun.security.provider.SHA2 */                                                                          \
    8.14 +  do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
    8.15 +  do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
    8.16 +                                                                                                                        \
    8.17 +  /* support for sun.security.provider.SHA5 */                                                                          \
    8.18 +  do_class(sun_security_provider_sha5,                             "sun/security/provider/SHA5")                        \
    8.19 +  do_intrinsic(_sha5_implCompress, sun_security_provider_sha5, implCompress_name, implCompress_signature, F_R)          \
    8.20 +                                                                                                                        \
    8.21 +  /* support for sun.security.provider.DigestBase */                                                                    \
    8.22 +  do_class(sun_security_provider_digestbase,                       "sun/security/provider/DigestBase")                  \
    8.23 +  do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R)   \
    8.24 +   do_name(     implCompressMB_name,                               "implCompressMultiBlock")                            \
    8.25 +   do_signature(implCompressMB_signature,                          "([BII)I")                                           \
    8.26 +                                                                                                                        \
    8.27    /* support for java.util.zip */                                                                                       \
    8.28    do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
    8.29    do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
     9.1 --- a/src/share/vm/opto/escape.cpp	Tue Jun 10 12:28:06 2014 -0700
     9.2 +++ b/src/share/vm/opto/escape.cpp	Wed Jun 11 11:05:10 2014 -0700
     9.3 @@ -1,5 +1,5 @@
     9.4  /*
     9.5 - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
     9.6 + * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
     9.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     9.8   *
     9.9   * This code is free software; you can redistribute it and/or modify it
    9.10 @@ -938,7 +938,13 @@
    9.11                    strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
    9.12                    strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
    9.13                    strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
    9.14 -                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
    9.15 +                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
    9.16 +                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
    9.17 +                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
    9.18 +                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
    9.19 +                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
    9.20 +                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
    9.21 +                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0)
    9.22                    ))) {
    9.23              call->dump();
    9.24              fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
    10.1 --- a/src/share/vm/opto/library_call.cpp	Tue Jun 10 12:28:06 2014 -0700
    10.2 +++ b/src/share/vm/opto/library_call.cpp	Wed Jun 11 11:05:10 2014 -0700
    10.3 @@ -310,6 +310,14 @@
    10.4    Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
    10.5    Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
    10.6    Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
    10.7 +  bool inline_sha_implCompress(vmIntrinsics::ID id);
    10.8 +  bool inline_digestBase_implCompressMB(int predicate);
    10.9 +  bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
   10.10 +                                 bool long_state, address stubAddr, const char *stubName,
   10.11 +                                 Node* src_start, Node* ofs, Node* limit);
   10.12 +  Node* get_state_from_sha_object(Node *sha_object);
   10.13 +  Node* get_state_from_sha5_object(Node *sha_object);
   10.14 +  Node* inline_digestBase_implCompressMB_predicate(int predicate);
   10.15    bool inline_encodeISOArray();
   10.16    bool inline_updateCRC32();
   10.17    bool inline_updateBytesCRC32();
   10.18 @@ -514,6 +522,23 @@
   10.19      predicates = 1;
   10.20      break;
   10.21  
   10.22 +  case vmIntrinsics::_sha_implCompress:
   10.23 +    if (!UseSHA1Intrinsics) return NULL;
   10.24 +    break;
   10.25 +
   10.26 +  case vmIntrinsics::_sha2_implCompress:
   10.27 +    if (!UseSHA256Intrinsics) return NULL;
   10.28 +    break;
   10.29 +
   10.30 +  case vmIntrinsics::_sha5_implCompress:
   10.31 +    if (!UseSHA512Intrinsics) return NULL;
   10.32 +    break;
   10.33 +
   10.34 +  case vmIntrinsics::_digestBase_implCompressMB:
   10.35 +    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
   10.36 +    predicates = 3;
   10.37 +    break;
   10.38 +
   10.39    case vmIntrinsics::_updateCRC32:
   10.40    case vmIntrinsics::_updateBytesCRC32:
   10.41    case vmIntrinsics::_updateByteBufferCRC32:
   10.42 @@ -879,6 +904,14 @@
   10.43    case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
   10.44      return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
   10.45  
   10.46 +  case vmIntrinsics::_sha_implCompress:
   10.47 +  case vmIntrinsics::_sha2_implCompress:
   10.48 +  case vmIntrinsics::_sha5_implCompress:
   10.49 +    return inline_sha_implCompress(intrinsic_id());
   10.50 +
   10.51 +  case vmIntrinsics::_digestBase_implCompressMB:
   10.52 +    return inline_digestBase_implCompressMB(predicate);
   10.53 +
   10.54    case vmIntrinsics::_encodeISOArray:
   10.55      return inline_encodeISOArray();
   10.56  
   10.57 @@ -916,6 +949,8 @@
   10.58      return inline_cipherBlockChaining_AESCrypt_predicate(false);
   10.59    case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
   10.60      return inline_cipherBlockChaining_AESCrypt_predicate(true);
   10.61 +  case vmIntrinsics::_digestBase_implCompressMB:
   10.62 +    return inline_digestBase_implCompressMB_predicate(predicate);
   10.63  
   10.64    default:
   10.65      // If you get here, it may be that someone has added a new intrinsic
   10.66 @@ -6128,3 +6163,258 @@
   10.67    record_for_igvn(region);
   10.68    return _gvn.transform(region);
   10.69  }
   10.70 +
   10.71 +//------------------------------inline_sha_implCompress-----------------------
   10.72 +//
   10.73 +// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
   10.74 +// void com.sun.security.provider.SHA.implCompress(byte[] buf, int ofs)
   10.75 +//
   10.76 +// Calculate SHA2 (i.e., SHA-244 or SHA-256) for single-block byte[] array.
   10.77 +// void com.sun.security.provider.SHA2.implCompress(byte[] buf, int ofs)
   10.78 +//
   10.79 +// Calculate SHA5 (i.e., SHA-384 or SHA-512) for single-block byte[] array.
   10.80 +// void com.sun.security.provider.SHA5.implCompress(byte[] buf, int ofs)
   10.81 +//
   10.82 +bool LibraryCallKit::inline_sha_implCompress(vmIntrinsics::ID id) {
   10.83 +  assert(callee()->signature()->size() == 2, "sha_implCompress has 2 parameters");
   10.84 +
   10.85 +  Node* sha_obj = argument(0);
   10.86 +  Node* src     = argument(1); // type oop
   10.87 +  Node* ofs     = argument(2); // type int
   10.88 +
   10.89 +  const Type* src_type = src->Value(&_gvn);
   10.90 +  const TypeAryPtr* top_src = src_type->isa_aryptr();
   10.91 +  if (top_src  == NULL || top_src->klass()  == NULL) {
   10.92 +    // failed array check
   10.93 +    return false;
   10.94 +  }
   10.95 +  // Figure out the size and type of the elements we will be copying.
   10.96 +  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
   10.97 +  if (src_elem != T_BYTE) {
   10.98 +    return false;
   10.99 +  }
  10.100 +  // 'src_start' points to src array + offset
  10.101 +  Node* src_start = array_element_address(src, ofs, src_elem);
  10.102 +  Node* state = NULL;
  10.103 +  address stubAddr;
  10.104 +  const char *stubName;
  10.105 +
  10.106 +  switch(id) {
  10.107 +  case vmIntrinsics::_sha_implCompress:
  10.108 +    assert(UseSHA1Intrinsics, "need SHA1 instruction support");
  10.109 +    state = get_state_from_sha_object(sha_obj);
  10.110 +    stubAddr = StubRoutines::sha1_implCompress();
  10.111 +    stubName = "sha1_implCompress";
  10.112 +    break;
  10.113 +  case vmIntrinsics::_sha2_implCompress:
  10.114 +    assert(UseSHA256Intrinsics, "need SHA256 instruction support");
  10.115 +    state = get_state_from_sha_object(sha_obj);
  10.116 +    stubAddr = StubRoutines::sha256_implCompress();
  10.117 +    stubName = "sha256_implCompress";
  10.118 +    break;
  10.119 +  case vmIntrinsics::_sha5_implCompress:
  10.120 +    assert(UseSHA512Intrinsics, "need SHA512 instruction support");
  10.121 +    state = get_state_from_sha5_object(sha_obj);
  10.122 +    stubAddr = StubRoutines::sha512_implCompress();
  10.123 +    stubName = "sha512_implCompress";
  10.124 +    break;
  10.125 +  default:
  10.126 +    fatal_unexpected_iid(id);
  10.127 +    return false;
  10.128 +  }
  10.129 +  if (state == NULL) return false;
  10.130 +
  10.131 +  // Call the stub.
  10.132 +  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::sha_implCompress_Type(),
  10.133 +                                 stubAddr, stubName, TypePtr::BOTTOM,
  10.134 +                                 src_start, state);
  10.135 +
  10.136 +  return true;
  10.137 +}
  10.138 +
  10.139 +//------------------------------inline_digestBase_implCompressMB-----------------------
  10.140 +//
  10.141 +// Calculate SHA/SHA2/SHA5 for multi-block byte[] array.
  10.142 +// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
  10.143 +//
  10.144 +bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
  10.145 +  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
  10.146 +         "need SHA1/SHA256/SHA512 instruction support");
  10.147 +  assert((uint)predicate < 3, "sanity");
  10.148 +  assert(callee()->signature()->size() == 3, "digestBase_implCompressMB has 3 parameters");
  10.149 +
  10.150 +  Node* digestBase_obj = argument(0); // The receiver was checked for NULL already.
  10.151 +  Node* src            = argument(1); // byte[] array
  10.152 +  Node* ofs            = argument(2); // type int
  10.153 +  Node* limit          = argument(3); // type int
  10.154 +
  10.155 +  const Type* src_type = src->Value(&_gvn);
  10.156 +  const TypeAryPtr* top_src = src_type->isa_aryptr();
  10.157 +  if (top_src  == NULL || top_src->klass()  == NULL) {
  10.158 +    // failed array check
  10.159 +    return false;
  10.160 +  }
  10.161 +  // Figure out the size and type of the elements we will be copying.
  10.162 +  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
  10.163 +  if (src_elem != T_BYTE) {
  10.164 +    return false;
  10.165 +  }
  10.166 +  // 'src_start' points to src array + offset
  10.167 +  Node* src_start = array_element_address(src, ofs, src_elem);
  10.168 +
  10.169 +  const char* klass_SHA_name = NULL;
  10.170 +  const char* stub_name = NULL;
  10.171 +  address     stub_addr = NULL;
  10.172 +  bool        long_state = false;
  10.173 +
  10.174 +  switch (predicate) {
  10.175 +  case 0:
  10.176 +    if (UseSHA1Intrinsics) {
  10.177 +      klass_SHA_name = "sun/security/provider/SHA";
  10.178 +      stub_name = "sha1_implCompressMB";
  10.179 +      stub_addr = StubRoutines::sha1_implCompressMB();
  10.180 +    }
  10.181 +    break;
  10.182 +  case 1:
  10.183 +    if (UseSHA256Intrinsics) {
  10.184 +      klass_SHA_name = "sun/security/provider/SHA2";
  10.185 +      stub_name = "sha256_implCompressMB";
  10.186 +      stub_addr = StubRoutines::sha256_implCompressMB();
  10.187 +    }
  10.188 +    break;
  10.189 +  case 2:
  10.190 +    if (UseSHA512Intrinsics) {
  10.191 +      klass_SHA_name = "sun/security/provider/SHA5";
  10.192 +      stub_name = "sha512_implCompressMB";
  10.193 +      stub_addr = StubRoutines::sha512_implCompressMB();
  10.194 +      long_state = true;
  10.195 +    }
  10.196 +    break;
  10.197 +  default:
  10.198 +    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
  10.199 +  }
  10.200 +  if (klass_SHA_name != NULL) {
  10.201 +    // get DigestBase klass to lookup for SHA klass
  10.202 +    const TypeInstPtr* tinst = _gvn.type(digestBase_obj)->isa_instptr();
  10.203 +    assert(tinst != NULL, "digestBase_obj is not instance???");
  10.204 +    assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
  10.205 +
  10.206 +    ciKlass* klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
  10.207 +    assert(klass_SHA->is_loaded(), "predicate checks that this class is loaded");
  10.208 +    ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
  10.209 +    return inline_sha_implCompressMB(digestBase_obj, instklass_SHA, long_state, stub_addr, stub_name, src_start, ofs, limit);
  10.210 +  }
  10.211 +  return false;
  10.212 +}
  10.213 +//------------------------------inline_sha_implCompressMB-----------------------
  10.214 +bool LibraryCallKit::inline_sha_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_SHA,
  10.215 +                                               bool long_state, address stubAddr, const char *stubName,
  10.216 +                                               Node* src_start, Node* ofs, Node* limit) {
  10.217 +  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_SHA);
  10.218 +  const TypeOopPtr* xtype = aklass->as_instance_type();
  10.219 +  Node* sha_obj = new (C) CheckCastPPNode(control(), digestBase_obj, xtype);
  10.220 +  sha_obj = _gvn.transform(sha_obj);
  10.221 +
  10.222 +  Node* state;
  10.223 +  if (long_state) {
  10.224 +    state = get_state_from_sha5_object(sha_obj);
  10.225 +  } else {
  10.226 +    state = get_state_from_sha_object(sha_obj);
  10.227 +  }
  10.228 +  if (state == NULL) return false;
  10.229 +
  10.230 +  // Call the stub.
  10.231 +  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
  10.232 +                                 OptoRuntime::digestBase_implCompressMB_Type(),
  10.233 +                                 stubAddr, stubName, TypePtr::BOTTOM,
  10.234 +                                 src_start, state, ofs, limit);
  10.235 +  // return ofs (int)
  10.236 +  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
  10.237 +  set_result(result);
  10.238 +
  10.239 +  return true;
  10.240 +}
  10.241 +
  10.242 +//------------------------------get_state_from_sha_object-----------------------
  10.243 +Node * LibraryCallKit::get_state_from_sha_object(Node *sha_object) {
  10.244 +  Node* sha_state = load_field_from_object(sha_object, "state", "[I", /*is_exact*/ false);
  10.245 +  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA/SHA2");
  10.246 +  if (sha_state == NULL) return (Node *) NULL;
  10.247 +
  10.248 +  // now have the array, need to get the start address of the state array
  10.249 +  Node* state = array_element_address(sha_state, intcon(0), T_INT);
  10.250 +  return state;
  10.251 +}
  10.252 +
  10.253 +//------------------------------get_state_from_sha5_object-----------------------
  10.254 +Node * LibraryCallKit::get_state_from_sha5_object(Node *sha_object) {
  10.255 +  Node* sha_state = load_field_from_object(sha_object, "state", "[J", /*is_exact*/ false);
  10.256 +  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA5");
  10.257 +  if (sha_state == NULL) return (Node *) NULL;
  10.258 +
  10.259 +  // now have the array, need to get the start address of the state array
  10.260 +  Node* state = array_element_address(sha_state, intcon(0), T_LONG);
  10.261 +  return state;
  10.262 +}
  10.263 +
  10.264 +//----------------------------inline_digestBase_implCompressMB_predicate----------------------------
  10.265 +// Return node representing slow path of predicate check.
  10.266 +// the pseudo code we want to emulate with this predicate is:
  10.267 +//    if (digestBaseObj instanceof SHA/SHA2/SHA5) do_intrinsic, else do_javapath
  10.268 +//
  10.269 +Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) {
  10.270 +  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
  10.271 +         "need SHA1/SHA256/SHA512 instruction support");
  10.272 +  assert((uint)predicate < 3, "sanity");
  10.273 +
  10.274 +  // The receiver was checked for NULL already.
  10.275 +  Node* digestBaseObj = argument(0);
  10.276 +
  10.277 +  // get DigestBase klass for instanceOf check
  10.278 +  const TypeInstPtr* tinst = _gvn.type(digestBaseObj)->isa_instptr();
  10.279 +  assert(tinst != NULL, "digestBaseObj is null");
  10.280 +  assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
  10.281 +
  10.282 +  const char* klass_SHA_name = NULL;
  10.283 +  switch (predicate) {
  10.284 +  case 0:
  10.285 +    if (UseSHA1Intrinsics) {
  10.286 +      // we want to do an instanceof comparison against the SHA class
  10.287 +      klass_SHA_name = "sun/security/provider/SHA";
  10.288 +    }
  10.289 +    break;
  10.290 +  case 1:
  10.291 +    if (UseSHA256Intrinsics) {
  10.292 +      // we want to do an instanceof comparison against the SHA2 class
  10.293 +      klass_SHA_name = "sun/security/provider/SHA2";
  10.294 +    }
  10.295 +    break;
  10.296 +  case 2:
  10.297 +    if (UseSHA512Intrinsics) {
  10.298 +      // we want to do an instanceof comparison against the SHA5 class
  10.299 +      klass_SHA_name = "sun/security/provider/SHA5";
  10.300 +    }
  10.301 +    break;
  10.302 +  default:
  10.303 +    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
  10.304 +  }
  10.305 +
  10.306 +  ciKlass* klass_SHA = NULL;
  10.307 +  if (klass_SHA_name != NULL) {
  10.308 +    klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
  10.309 +  }
  10.310 +  if ((klass_SHA == NULL) || !klass_SHA->is_loaded()) {
  10.311 +    // if none of SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path
  10.312 +    Node* ctrl = control();
  10.313 +    set_control(top()); // no intrinsic path
  10.314 +    return ctrl;
  10.315 +  }
  10.316 +  ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
  10.317 +
  10.318 +  Node* instofSHA = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass_SHA)));
  10.319 +  Node* cmp_instof = _gvn.transform(new (C) CmpINode(instofSHA, intcon(1)));
  10.320 +  Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
  10.321 +  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
  10.322 +
  10.323 +  return instof_false;  // even if it is NULL
  10.324 +}
    11.1 --- a/src/share/vm/opto/runtime.cpp	Tue Jun 10 12:28:06 2014 -0700
    11.2 +++ b/src/share/vm/opto/runtime.cpp	Wed Jun 11 11:05:10 2014 -0700
    11.3 @@ -898,6 +898,50 @@
    11.4    return TypeFunc::make(domain, range);
    11.5  }
    11.6  
    11.7 +/*
    11.8 + * void implCompress(byte[] buf, int ofs)
    11.9 + */
   11.10 +const TypeFunc* OptoRuntime::sha_implCompress_Type() {
   11.11 +  // create input type (domain)
   11.12 +  int num_args = 2;
   11.13 +  int argcnt = num_args;
   11.14 +  const Type** fields = TypeTuple::fields(argcnt);
   11.15 +  int argp = TypeFunc::Parms;
   11.16 +  fields[argp++] = TypePtr::NOTNULL; // buf
   11.17 +  fields[argp++] = TypePtr::NOTNULL; // state
   11.18 +  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   11.19 +  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
   11.20 +
   11.21 +  // no result type needed
   11.22 +  fields = TypeTuple::fields(1);
   11.23 +  fields[TypeFunc::Parms+0] = NULL; // void
   11.24 +  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
   11.25 +  return TypeFunc::make(domain, range);
   11.26 +}
   11.27 +
   11.28 +/*
   11.29 + * int implCompressMultiBlock(byte[] b, int ofs, int limit)
   11.30 + */
   11.31 +const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() {
   11.32 +  // create input type (domain)
   11.33 +  int num_args = 4;
   11.34 +  int argcnt = num_args;
   11.35 +  const Type** fields = TypeTuple::fields(argcnt);
   11.36 +  int argp = TypeFunc::Parms;
   11.37 +  fields[argp++] = TypePtr::NOTNULL; // buf
   11.38 +  fields[argp++] = TypePtr::NOTNULL; // state
   11.39 +  fields[argp++] = TypeInt::INT;     // ofs
   11.40 +  fields[argp++] = TypeInt::INT;     // limit
   11.41 +  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   11.42 +  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
   11.43 +
   11.44 +  // returning ofs (int)
   11.45 +  fields = TypeTuple::fields(1);
   11.46 +  fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs
   11.47 +  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
   11.48 +  return TypeFunc::make(domain, range);
   11.49 +}
   11.50 +
   11.51  //------------- Interpreter state access for on stack replacement
   11.52  const TypeFunc* OptoRuntime::osr_end_Type() {
   11.53    // create input type (domain)
    12.1 --- a/src/share/vm/opto/runtime.hpp	Tue Jun 10 12:28:06 2014 -0700
    12.2 +++ b/src/share/vm/opto/runtime.hpp	Wed Jun 11 11:05:10 2014 -0700
    12.3 @@ -1,5 +1,5 @@
    12.4  /*
    12.5 - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
    12.6 + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
    12.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    12.8   *
    12.9   * This code is free software; you can redistribute it and/or modify it
   12.10 @@ -300,6 +300,9 @@
   12.11    static const TypeFunc* aescrypt_block_Type();
   12.12    static const TypeFunc* cipherBlockChaining_aescrypt_Type();
   12.13  
   12.14 +  static const TypeFunc* sha_implCompress_Type();
   12.15 +  static const TypeFunc* digestBase_implCompressMB_Type();
   12.16 +
   12.17    static const TypeFunc* updateBytesCRC32_Type();
   12.18  
   12.19    // leaf on stack replacement interpreter accessor types
    13.1 --- a/src/share/vm/runtime/globals.hpp	Tue Jun 10 12:28:06 2014 -0700
    13.2 +++ b/src/share/vm/runtime/globals.hpp	Wed Jun 11 11:05:10 2014 -0700
    13.3 @@ -597,6 +597,9 @@
    13.4    product(bool, UseAES, false,                                              \
    13.5            "Control whether AES instructions can be used on x86/x64")        \
    13.6                                                                              \
    13.7 +  product(bool, UseSHA, false,                                              \
    13.8 +          "Control whether SHA instructions can be used on SPARC")          \
    13.9 +                                                                            \
   13.10    product(uintx, LargePageSizeInBytes, 0,                                   \
   13.11            "Large page size (0 to let VM choose the page size)")             \
   13.12                                                                              \
   13.13 @@ -703,6 +706,15 @@
   13.14    product(bool, UseAESIntrinsics, false,                                    \
   13.15            "Use intrinsics for AES versions of crypto")                      \
   13.16                                                                              \
   13.17 +  product(bool, UseSHA1Intrinsics, false,                                   \
   13.18 +          "Use intrinsics for SHA-1 crypto hash function")                  \
   13.19 +                                                                            \
   13.20 +  product(bool, UseSHA256Intrinsics, false,                                 \
   13.21 +          "Use intrinsics for SHA-224 and SHA-256 crypto hash functions")   \
   13.22 +                                                                            \
   13.23 +  product(bool, UseSHA512Intrinsics, false,                                 \
   13.24 +          "Use intrinsics for SHA-384 and SHA-512 crypto hash functions")   \
   13.25 +                                                                            \
   13.26    product(bool, UseCRC32Intrinsics, false,                                  \
   13.27            "use intrinsics for java.util.zip.CRC32")                         \
   13.28                                                                              \
    14.1 --- a/src/share/vm/runtime/stubRoutines.cpp	Tue Jun 10 12:28:06 2014 -0700
    14.2 +++ b/src/share/vm/runtime/stubRoutines.cpp	Wed Jun 11 11:05:10 2014 -0700
    14.3 @@ -1,5 +1,5 @@
    14.4  /*
    14.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
    14.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
    14.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    14.8   *
    14.9   * This code is free software; you can redistribute it and/or modify it
   14.10 @@ -125,6 +125,13 @@
   14.11  address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
   14.12  address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
   14.13  
   14.14 +address StubRoutines::_sha1_implCompress     = NULL;
   14.15 +address StubRoutines::_sha1_implCompressMB   = NULL;
   14.16 +address StubRoutines::_sha256_implCompress   = NULL;
   14.17 +address StubRoutines::_sha256_implCompressMB = NULL;
   14.18 +address StubRoutines::_sha512_implCompress   = NULL;
   14.19 +address StubRoutines::_sha512_implCompressMB = NULL;
   14.20 +
   14.21  address StubRoutines::_updateBytesCRC32 = NULL;
   14.22  address StubRoutines::_crc_table_adr = NULL;
   14.23  
    15.1 --- a/src/share/vm/runtime/stubRoutines.hpp	Tue Jun 10 12:28:06 2014 -0700
    15.2 +++ b/src/share/vm/runtime/stubRoutines.hpp	Wed Jun 11 11:05:10 2014 -0700
    15.3 @@ -1,5 +1,5 @@
    15.4  /*
    15.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
    15.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
    15.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    15.8   *
    15.9   * This code is free software; you can redistribute it and/or modify it
   15.10 @@ -207,6 +207,13 @@
   15.11    static address _cipherBlockChaining_encryptAESCrypt;
   15.12    static address _cipherBlockChaining_decryptAESCrypt;
   15.13  
   15.14 +  static address _sha1_implCompress;
   15.15 +  static address _sha1_implCompressMB;
   15.16 +  static address _sha256_implCompress;
   15.17 +  static address _sha256_implCompressMB;
   15.18 +  static address _sha512_implCompress;
   15.19 +  static address _sha512_implCompressMB;
   15.20 +
   15.21    static address _updateBytesCRC32;
   15.22    static address _crc_table_adr;
   15.23  
   15.24 @@ -356,6 +363,13 @@
   15.25    static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   15.26    static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
   15.27  
   15.28 +  static address sha1_implCompress()     { return _sha1_implCompress; }
   15.29 +  static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
   15.30 +  static address sha256_implCompress()   { return _sha256_implCompress; }
   15.31 +  static address sha256_implCompressMB() { return _sha256_implCompressMB; }
   15.32 +  static address sha512_implCompress()   { return _sha512_implCompress; }
   15.33 +  static address sha512_implCompressMB() { return _sha512_implCompressMB; }
   15.34 +
   15.35    static address updateBytesCRC32()    { return _updateBytesCRC32; }
   15.36    static address crc_table_addr()      { return _crc_table_adr; }
   15.37  
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/test/compiler/intrinsics/sha/TestSHA.java	Wed Jun 11 11:05:10 2014 -0700
    16.3 @@ -0,0 +1,141 @@
    16.4 +/*
    16.5 + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
    16.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    16.7 + *
    16.8 + * This code is free software; you can redistribute it and/or modify it
    16.9 + * under the terms of the GNU General Public License version 2 only, as
   16.10 + * published by the Free Software Foundation.
   16.11 + *
   16.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   16.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   16.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   16.15 + * version 2 for more details (a copy is included in the LICENSE file that
   16.16 + * accompanied this code).
   16.17 + *
   16.18 + * You should have received a copy of the GNU General Public License version
   16.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   16.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   16.21 + *
   16.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   16.23 + * or visit www.oracle.com if you need additional information or have any
   16.24 + * questions.
   16.25 + *
   16.26 + */
   16.27 +
   16.28 +/**
   16.29 + * @test
   16.30 + * @bug 8035968
   16.31 + * @summary C2 support for SHA on SPARC
   16.32 + *
   16.33 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   TestSHA
   16.34 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 TestSHA
   16.35 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 TestSHA
   16.36 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 TestSHA
   16.37 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 TestSHA
   16.38 + *
   16.39 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Doffset=1 TestSHA
   16.40 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 -Doffset=1 TestSHA
   16.41 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Doffset=1 TestSHA
   16.42 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 -Doffset=1 TestSHA
   16.43 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 -Doffset=1 TestSHA
   16.44 + *
   16.45 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-256 TestSHA
   16.46 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-512 TestSHA
   16.47 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Dalgorithm2=SHA-512 TestSHA
   16.48 + *
   16.49 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=MD5     TestSHA
   16.50 + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=MD5     -Dalgorithm2=SHA-1   TestSHA
   16.51 + */
   16.52 +
   16.53 +import java.security.MessageDigest;
   16.54 +import java.util.Arrays;
   16.55 +
   16.56 +public class TestSHA {
   16.57 +    private static final int HASH_LEN = 64; /* up to 512-bit */
   16.58 +    private static final int ALIGN = 8;     /* for different data alignments */
   16.59 +
   16.60 +    public static void main(String[] args) throws Exception {
   16.61 +        String provider = System.getProperty("provider", "SUN");
   16.62 +        String algorithm = System.getProperty("algorithm", "SHA-1");
   16.63 +        String algorithm2 = System.getProperty("algorithm2", "");
   16.64 +        int msgSize = Integer.getInteger("msgSize", 1024);
   16.65 +        int offset = Integer.getInteger("offset", 0)  % ALIGN;
   16.66 +        int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 100000);
   16.67 +        int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000);
   16.68 +
   16.69 +        testSHA(provider, algorithm, msgSize, offset, iters, warmupIters);
   16.70 +
   16.71 +        if (algorithm2.equals("") == false) {
   16.72 +            testSHA(provider, algorithm2, msgSize, offset, iters, warmupIters);
   16.73 +        }
   16.74 +    }
   16.75 +
   16.76 +    static void testSHA(String provider, String algorithm, int msgSize,
   16.77 +                        int offset, int iters, int warmupIters) throws Exception {
   16.78 +        System.out.println("provider = " + provider);
   16.79 +        System.out.println("algorithm = " + algorithm);
   16.80 +        System.out.println("msgSize = " + msgSize + " bytes");
   16.81 +        System.out.println("offset = " + offset);
   16.82 +        System.out.println("iters = " + iters);
   16.83 +
   16.84 +        byte[] expectedHash = new byte[HASH_LEN];
   16.85 +        byte[] hash = new byte[HASH_LEN];
   16.86 +        byte[] data = new byte[msgSize + offset];
   16.87 +        for (int i = 0; i < (msgSize + offset); i++) {
   16.88 +            data[i] = (byte)(i & 0xff);
   16.89 +        }
   16.90 +
   16.91 +        try {
   16.92 +            MessageDigest sha = MessageDigest.getInstance(algorithm, provider);
   16.93 +
   16.94 +            /* do once, which doesn't use intrinsics */
   16.95 +            sha.reset();
   16.96 +            sha.update(data, offset, msgSize);
   16.97 +            expectedHash = sha.digest();
   16.98 +
   16.99 +            /* warm up */
  16.100 +            for (int i = 0; i < warmupIters; i++) {
  16.101 +                sha.reset();
  16.102 +                sha.update(data, offset, msgSize);
  16.103 +                hash = sha.digest();
  16.104 +            }
  16.105 +
  16.106 +            /* check result */
  16.107 +            if (Arrays.equals(hash, expectedHash) == false) {
  16.108 +                System.out.println("TestSHA Error: ");
  16.109 +                showArray(expectedHash, "expectedHash");
  16.110 +                showArray(hash,         "computedHash");
  16.111 +                //System.exit(1);
  16.112 +                throw new Exception("TestSHA Error");
  16.113 +            } else {
  16.114 +                showArray(hash, "hash");
  16.115 +            }
  16.116 +
  16.117 +            /* measure performance */
  16.118 +            long start = System.nanoTime();
  16.119 +            for (int i = 0; i < iters; i++) {
  16.120 +                sha.reset();
  16.121 +                sha.update(data, offset, msgSize);
  16.122 +                hash = sha.digest();
  16.123 +            }
  16.124 +            long end = System.nanoTime();
  16.125 +            double total = (double)(end - start)/1e9;         /* in seconds */
  16.126 +            double thruput = (double)msgSize*iters/1e6/total; /* in MB/s */
  16.127 +            System.out.println("TestSHA runtime = " + total + " seconds");
  16.128 +            System.out.println("TestSHA throughput = " + thruput + " MB/s");
  16.129 +            System.out.println();
  16.130 +        } catch (Exception e) {
  16.131 +            System.out.println("Exception: " + e);
  16.132 +            //System.exit(1);
  16.133 +            throw new Exception(e);
  16.134 +        }
  16.135 +    }
  16.136 +
  16.137 +    static void showArray(byte b[], String name) {
  16.138 +        System.out.format("%s [%d]: ", name, b.length);
  16.139 +        for (int i = 0; i < Math.min(b.length, HASH_LEN); i++) {
  16.140 +            System.out.format("%02x ", b[i] & 0xff);
  16.141 +        }
  16.142 +        System.out.println();
  16.143 +    }
  16.144 +}

mercurial