8130150: Implement BigInteger.montgomeryMultiply intrinsic

Fri, 04 Mar 2016 16:15:48 +0300

author
vkempik
date
Fri, 04 Mar 2016 16:15:48 +0300
changeset 8490
5601e440e5e7
parent 8489
51c505229e71
child 8491
4abc54f62213

8130150: Implement BigInteger.montgomeryMultiply intrinsic
Reviewed-by: kvn, mdoerr

src/cpu/x86/vm/sharedRuntime_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubGenerator_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/c2_globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/escape.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/sharedRuntime.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/stubRoutines.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/stubRoutines.hpp file | annotate | diff | comparison | revisions
test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Feb 17 13:40:12 2016 +0300
     1.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Mar 04 16:15:48 2016 +0300
     1.3 @@ -23,6 +23,9 @@
     1.4   */
     1.5  
     1.6  #include "precompiled.hpp"
     1.7 +#ifndef _WINDOWS
     1.8 +#include "alloca.h"
     1.9 +#endif
    1.10  #include "asm/macroAssembler.hpp"
    1.11  #include "asm/macroAssembler.inline.hpp"
    1.12  #include "code/debugInfoRec.hpp"
    1.13 @@ -3966,6 +3969,256 @@
    1.14  }
    1.15  
    1.16  
    1.17 +//------------------------------Montgomery multiplication------------------------
    1.18 +//
    1.19 +
    1.20 +#ifndef _WINDOWS
    1.21 +
    1.22 +#define ASM_SUBTRACT
    1.23 +
    1.24 +#ifdef ASM_SUBTRACT
    1.25 +// Subtract 0:b from carry:a.  Return carry.
    1.26 +static unsigned long
    1.27 +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
    1.28 +  long i = 0, cnt = len;
    1.29 +  unsigned long tmp;
    1.30 +  asm volatile("clc; "
    1.31 +               "0: ; "
    1.32 +               "mov (%[b], %[i], 8), %[tmp]; "
    1.33 +               "sbb %[tmp], (%[a], %[i], 8); "
    1.34 +               "inc %[i]; dec %[cnt]; "
    1.35 +               "jne 0b; "
    1.36 +               "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
    1.37 +               : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
    1.38 +               : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
    1.39 +               : "memory");
    1.40 +  return tmp;
    1.41 +}
    1.42 +#else // ASM_SUBTRACT
    1.43 +typedef int __attribute__((mode(TI))) int128;
    1.44 +
    1.45 +// Subtract 0:b from carry:a.  Return carry.
    1.46 +static unsigned long
    1.47 +sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
    1.48 +  int128 tmp = 0;
    1.49 +  int i;
    1.50 +  for (i = 0; i < len; i++) {
    1.51 +    tmp += a[i];
    1.52 +    tmp -= b[i];
    1.53 +    a[i] = tmp;
    1.54 +    tmp >>= 64;
    1.55 +    assert(-1 <= tmp && tmp <= 0, "invariant");
    1.56 +  }
    1.57 +  return tmp + carry;
    1.58 +}
    1.59 +#endif // ! ASM_SUBTRACT
    1.60 +
    1.61 +// Multiply (unsigned) Long A by Long B, accumulating the double-
    1.62 +// length result into the accumulator formed of T0, T1, and T2.
    1.63 +#define MACC(A, B, T0, T1, T2)                                      \
    1.64 +do {                                                                \
    1.65 +  unsigned long hi, lo;                                             \
    1.66 +  asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4"   \
    1.67 +           : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)      \
    1.68 +           : "r"(A), "a"(B) : "cc");                                \
    1.69 + } while(0)
    1.70 +
    1.71 +// As above, but add twice the double-length result into the
    1.72 +// accumulator.
    1.73 +#define MACC2(A, B, T0, T1, T2)                                     \
    1.74 +do {                                                                \
    1.75 +  unsigned long hi, lo;                                             \
    1.76 +  asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4;"  \
    1.77 +           "add %%rax, %2; adc %%rdx, %3; adc $0, %4"               \
    1.78 +           : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)      \
    1.79 +           : "r"(A), "a"(B) : "cc");                                \
    1.80 + } while(0)
    1.81 +
    1.82 +// Fast Montgomery multiplication.  The derivation of the algorithm is
    1.83 +// in  A Cryptographic Library for the Motorola DSP56000,
    1.84 +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
    1.85 +
    1.86 +static void __attribute__((noinline))
    1.87 +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
    1.88 +                    unsigned long m[], unsigned long inv, int len) {
    1.89 +  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
    1.90 +  int i;
    1.91 +
    1.92 +  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
    1.93 +
    1.94 +  for (i = 0; i < len; i++) {
    1.95 +    int j;
    1.96 +    for (j = 0; j < i; j++) {
    1.97 +      MACC(a[j], b[i-j], t0, t1, t2);
    1.98 +      MACC(m[j], n[i-j], t0, t1, t2);
    1.99 +    }
   1.100 +    MACC(a[i], b[0], t0, t1, t2);
   1.101 +    m[i] = t0 * inv;
   1.102 +    MACC(m[i], n[0], t0, t1, t2);
   1.103 +
   1.104 +    assert(t0 == 0, "broken Montgomery multiply");
   1.105 +
   1.106 +    t0 = t1; t1 = t2; t2 = 0;
   1.107 +  }
   1.108 +
   1.109 +  for (i = len; i < 2*len; i++) {
   1.110 +    int j;
   1.111 +    for (j = i-len+1; j < len; j++) {
   1.112 +      MACC(a[j], b[i-j], t0, t1, t2);
   1.113 +      MACC(m[j], n[i-j], t0, t1, t2);
   1.114 +    }
   1.115 +    m[i-len] = t0;
   1.116 +    t0 = t1; t1 = t2; t2 = 0;
   1.117 +  }
   1.118 +
   1.119 +  while (t0)
   1.120 +    t0 = sub(m, n, t0, len);
   1.121 +}
   1.122 +
   1.123 +// Fast Montgomery squaring.  This uses asymptotically 25% fewer
   1.124 +// multiplies so it should be up to 25% faster than Montgomery
   1.125 +// multiplication.  However, its loop control is more complex and it
   1.126 +// may actually run slower on some machines.
   1.127 +
   1.128 +static void __attribute__((noinline))
   1.129 +montgomery_square(unsigned long a[], unsigned long n[],
   1.130 +                  unsigned long m[], unsigned long inv, int len) {
   1.131 +  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
   1.132 +  int i;
   1.133 +
   1.134 +  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
   1.135 +
   1.136 +  for (i = 0; i < len; i++) {
   1.137 +    int j;
   1.138 +    int end = (i+1)/2;
   1.139 +    for (j = 0; j < end; j++) {
   1.140 +      MACC2(a[j], a[i-j], t0, t1, t2);
   1.141 +      MACC(m[j], n[i-j], t0, t1, t2);
   1.142 +    }
   1.143 +    if ((i & 1) == 0) {
   1.144 +      MACC(a[j], a[j], t0, t1, t2);
   1.145 +    }
   1.146 +    for (; j < i; j++) {
   1.147 +      MACC(m[j], n[i-j], t0, t1, t2);
   1.148 +    }
   1.149 +    m[i] = t0 * inv;
   1.150 +    MACC(m[i], n[0], t0, t1, t2);
   1.151 +
   1.152 +    assert(t0 == 0, "broken Montgomery square");
   1.153 +
   1.154 +    t0 = t1; t1 = t2; t2 = 0;
   1.155 +  }
   1.156 +
   1.157 +  for (i = len; i < 2*len; i++) {
   1.158 +    int start = i-len+1;
   1.159 +    int end = start + (len - start)/2;
   1.160 +    int j;
   1.161 +    for (j = start; j < end; j++) {
   1.162 +      MACC2(a[j], a[i-j], t0, t1, t2);
   1.163 +      MACC(m[j], n[i-j], t0, t1, t2);
   1.164 +    }
   1.165 +    if ((i & 1) == 0) {
   1.166 +      MACC(a[j], a[j], t0, t1, t2);
   1.167 +    }
   1.168 +    for (; j < len; j++) {
   1.169 +      MACC(m[j], n[i-j], t0, t1, t2);
   1.170 +    }
   1.171 +    m[i-len] = t0;
   1.172 +    t0 = t1; t1 = t2; t2 = 0;
   1.173 +  }
   1.174 +
   1.175 +  while (t0)
   1.176 +    t0 = sub(m, n, t0, len);
   1.177 +}
   1.178 +
   1.179 +// Swap words in a longword.
   1.180 +static unsigned long swap(unsigned long x) {
   1.181 +  return (x << 32) | (x >> 32);
   1.182 +}
   1.183 +
   1.184 +// Copy len longwords from s to d, word-swapping as we go.  The
   1.185 +// destination array is reversed.
   1.186 +static void reverse_words(unsigned long *s, unsigned long *d, int len) {
   1.187 +  d += len;
   1.188 +  while(len-- > 0) {
   1.189 +    d--;
   1.190 +    *d = swap(*s);
   1.191 +    s++;
   1.192 +  }
   1.193 +}
   1.194 +
   1.195 +// The threshold at which squaring is advantageous was determined
   1.196 +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
   1.197 +#define MONTGOMERY_SQUARING_THRESHOLD 64
   1.198 +
   1.199 +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
   1.200 +                                        jint len, jlong inv,
   1.201 +                                        jint *m_ints) {
   1.202 +  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
   1.203 +  int longwords = len/2;
   1.204 +
   1.205 +  // Make very sure we don't use so much space that the stack might
   1.206 +  // overflow.  512 jints corresponds to an 16384-bit integer and
   1.207 +  // will use here a total of 8k bytes of stack space.
   1.208 +  int total_allocation = longwords * sizeof (unsigned long) * 4;
   1.209 +  guarantee(total_allocation <= 8192, "must be");
   1.210 +  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
   1.211 +
   1.212 +  // Local scratch arrays
   1.213 +  unsigned long
   1.214 +    *a = scratch + 0 * longwords,
   1.215 +    *b = scratch + 1 * longwords,
   1.216 +    *n = scratch + 2 * longwords,
   1.217 +    *m = scratch + 3 * longwords;
   1.218 +
   1.219 +  reverse_words((unsigned long *)a_ints, a, longwords);
   1.220 +  reverse_words((unsigned long *)b_ints, b, longwords);
   1.221 +  reverse_words((unsigned long *)n_ints, n, longwords);
   1.222 +
   1.223 +  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
   1.224 +
   1.225 +  reverse_words(m, (unsigned long *)m_ints, longwords);
   1.226 +}
   1.227 +
   1.228 +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
   1.229 +                                      jint len, jlong inv,
   1.230 +                                      jint *m_ints) {
   1.231 +  assert(len % 2 == 0, "array length in montgomery_square must be even");
   1.232 +  int longwords = len/2;
   1.233 +
   1.234 +  // Make very sure we don't use so much space that the stack might
   1.235 +  // overflow.  512 jints corresponds to an 16384-bit integer and
   1.236 +  // will use here a total of 6k bytes of stack space.
   1.237 +  int total_allocation = longwords * sizeof (unsigned long) * 3;
   1.238 +  guarantee(total_allocation <= 8192, "must be");
   1.239 +  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
   1.240 +
   1.241 +  // Local scratch arrays
   1.242 +  unsigned long
   1.243 +    *a = scratch + 0 * longwords,
   1.244 +    *n = scratch + 1 * longwords,
   1.245 +    *m = scratch + 2 * longwords;
   1.246 +
   1.247 +  reverse_words((unsigned long *)a_ints, a, longwords);
   1.248 +  reverse_words((unsigned long *)n_ints, n, longwords);
   1.249 +
   1.250 +  //montgomery_square fails to pass BigIntegerTest on solaris amd64
   1.251 +  //on jdk7 and jdk8.
   1.252 +#ifndef SOLARIS
   1.253 +  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
   1.254 +#else
   1.255 +  if (0) {
   1.256 +#endif
   1.257 +    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
   1.258 +  } else {
   1.259 +    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
   1.260 +  }
   1.261 +
   1.262 +  reverse_words(m, (unsigned long *)m_ints, longwords);
   1.263 +}
   1.264 +
   1.265 +#endif // WINDOWS
   1.266 +
   1.267  #ifdef COMPILER2
   1.268  // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
   1.269  //
     2.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Feb 17 13:40:12 2016 +0300
     2.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Mar 04 16:15:48 2016 +0300
     2.3 @@ -4094,7 +4094,18 @@
     2.4      if (UseMulAddIntrinsic) {
     2.5        StubRoutines::_mulAdd = generate_mulAdd();
     2.6      }
     2.7 -#endif
     2.8 +
     2.9 +#ifndef _WINDOWS
    2.10 +    if (UseMontgomeryMultiplyIntrinsic) {
    2.11 +      StubRoutines::_montgomeryMultiply
    2.12 +        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
    2.13 +    }
    2.14 +    if (UseMontgomerySquareIntrinsic) {
    2.15 +      StubRoutines::_montgomerySquare
    2.16 +        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
    2.17 +    }
    2.18 +#endif // WINDOWS
    2.19 +#endif // COMPILER2
    2.20    }
    2.21  
    2.22   public:
     3.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Feb 17 13:40:12 2016 +0300
     3.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Mar 04 16:15:48 2016 +0300
     3.3 @@ -709,6 +709,12 @@
     3.4    if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
     3.5      UseMulAddIntrinsic = true;
     3.6    }
     3.7 +  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
     3.8 +    UseMontgomeryMultiplyIntrinsic = true;
     3.9 +  }
    3.10 +  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
    3.11 +    UseMontgomerySquareIntrinsic = true;
    3.12 +  }
    3.13  #else
    3.14    if (UseMultiplyToLenIntrinsic) {
    3.15      if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
    3.16 @@ -728,6 +734,18 @@
    3.17      }
    3.18      FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
    3.19    }
    3.20 +  if (UseMontgomeryMultiplyIntrinsic) {
    3.21 +    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
    3.22 +      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
    3.23 +    }
    3.24 +    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
    3.25 +  }
    3.26 +  if (UseMontgomerySquareIntrinsic) {
    3.27 +    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
    3.28 +      warning("montgomerySquare intrinsic is not available in 32-bit VM");
    3.29 +    }
    3.30 +    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
    3.31 +  }
    3.32  #endif
    3.33  #endif // COMPILER2
    3.34  
     4.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Wed Feb 17 13:40:12 2016 +0300
     4.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Mar 04 16:15:48 2016 +0300
     4.3 @@ -789,7 +789,7 @@
     4.4     do_signature(encodeISOArray_signature,                        "([CI[BII)I")                                          \
     4.5                                                                                                                          \
     4.6    do_class(java_math_BigInteger,                      "java/math/BigInteger")                                           \
     4.7 -  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R)             \
     4.8 +  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S)             \
     4.9     do_name(     multiplyToLen_name,                             "multiplyToLen")                                        \
    4.10     do_signature(multiplyToLen_signature,                        "([II[II[I)[I")                                         \
    4.11                                                                                                                          \
    4.12 @@ -801,6 +801,14 @@
    4.13     do_name(     mulAdd_name,                                  "implMulAdd")                                             \
    4.14     do_signature(mulAdd_signature,                             "([I[IIII)I")                                             \
    4.15                                                                                                                          \
    4.16 +  do_intrinsic(_montgomeryMultiply,      java_math_BigInteger, montgomeryMultiply_name, montgomeryMultiply_signature, F_S) \
    4.17 +   do_name(     montgomeryMultiply_name,                             "implMontgomeryMultiply")                          \
    4.18 +   do_signature(montgomeryMultiply_signature,                        "([I[I[IIJ[I)[I")                                  \
    4.19 +                                                                                                                        \
    4.20 +  do_intrinsic(_montgomerySquare,      java_math_BigInteger, montgomerySquare_name, montgomerySquare_signature, F_S)    \
    4.21 +   do_name(     montgomerySquare_name,                             "implMontgomerySquare")                              \
    4.22 +   do_signature(montgomerySquare_signature,                        "([I[IIJ[I)[I")                                      \
    4.23 +                                                                                                                        \
    4.24    /* java/lang/ref/Reference */                                                                                         \
    4.25    do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
    4.26                                                                                                                          \
     5.1 --- a/src/share/vm/opto/c2_globals.hpp	Wed Feb 17 13:40:12 2016 +0300
     5.2 +++ b/src/share/vm/opto/c2_globals.hpp	Fri Mar 04 16:15:48 2016 +0300
     5.3 @@ -665,6 +665,12 @@
     5.4    product(bool, UseMulAddIntrinsic, false,                                  \
     5.5            "Enables intrinsification of BigInteger.mulAdd()")                \
     5.6                                                                              \
     5.7 +  product(bool, UseMontgomeryMultiplyIntrinsic, false,                      \
     5.8 +          "Enables intrinsification of BigInteger.montgomeryMultiply()")    \
     5.9 +                                                                            \
    5.10 +  product(bool, UseMontgomerySquareIntrinsic, false,                        \
    5.11 +          "Enables intrinsification of BigInteger.montgomerySquare()")      \
    5.12 +                                                                            \
    5.13    product(bool, UseTypeSpeculation, true,                                   \
    5.14            "Speculatively propagate types from profiles")                    \
    5.15                                                                              \
     6.1 --- a/src/share/vm/opto/escape.cpp	Wed Feb 17 13:40:12 2016 +0300
     6.2 +++ b/src/share/vm/opto/escape.cpp	Fri Mar 04 16:15:48 2016 +0300
     6.3 @@ -960,8 +960,10 @@
     6.4                    strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
     6.5                    strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 ||
     6.6                    strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
     6.7 -                  strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0)
     6.8 -                  ))) {
     6.9 +                  strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
    6.10 +                  strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
    6.11 +                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
    6.12 +                 ))) {
    6.13              call->dump();
    6.14              fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
    6.15            }
     7.1 --- a/src/share/vm/opto/library_call.cpp	Wed Feb 17 13:40:12 2016 +0300
     7.2 +++ b/src/share/vm/opto/library_call.cpp	Fri Mar 04 16:15:48 2016 +0300
     7.3 @@ -326,6 +326,8 @@
     7.4    bool inline_multiplyToLen();
     7.5    bool inline_squareToLen();
     7.6    bool inline_mulAdd();
     7.7 +  bool inline_montgomeryMultiply();
     7.8 +  bool inline_montgomerySquare();
     7.9  
    7.10    bool inline_profileBoolean();
    7.11  };
    7.12 @@ -537,6 +539,13 @@
    7.13      if (!UseMulAddIntrinsic) return NULL;
    7.14      break;
    7.15  
    7.16 +  case vmIntrinsics::_montgomeryMultiply:
    7.17 +     if (!UseMontgomeryMultiplyIntrinsic) return NULL;
    7.18 +    break;
    7.19 +  case vmIntrinsics::_montgomerySquare:
    7.20 +     if (!UseMontgomerySquareIntrinsic) return NULL;
    7.21 +    break;
    7.22 +
    7.23    case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
    7.24    case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    7.25      if (!UseAESIntrinsics) return NULL;
    7.26 @@ -943,6 +952,11 @@
    7.27    case vmIntrinsics::_mulAdd:
    7.28      return inline_mulAdd();
    7.29  
    7.30 +  case vmIntrinsics::_montgomeryMultiply:
    7.31 +    return inline_montgomeryMultiply();
    7.32 +  case vmIntrinsics::_montgomerySquare:
    7.33 +    return inline_montgomerySquare();
    7.34 +
    7.35    case vmIntrinsics::_encodeISOArray:
    7.36      return inline_encodeISOArray();
    7.37  
    7.38 @@ -5783,11 +5797,12 @@
    7.39  
    7.40    assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters");
    7.41  
    7.42 -  Node* x    = argument(1);
    7.43 -  Node* xlen = argument(2);
    7.44 -  Node* y    = argument(3);
    7.45 -  Node* ylen = argument(4);
    7.46 -  Node* z    = argument(5);
    7.47 +  // no receiver because it is a static method
    7.48 +  Node* x    = argument(0);
    7.49 +  Node* xlen = argument(1);
    7.50 +  Node* y    = argument(2);
    7.51 +  Node* ylen = argument(3);
    7.52 +  Node* z    = argument(4);
    7.53  
    7.54    const Type* x_type = x->Value(&_gvn);
    7.55    const Type* y_type = y->Value(&_gvn);
    7.56 @@ -5966,6 +5981,121 @@
    7.57    return true;
    7.58  }
    7.59  
    7.60 +//-------------inline_montgomeryMultiply-----------------------------------
    7.61 +bool LibraryCallKit::inline_montgomeryMultiply() {
    7.62 +  address stubAddr = StubRoutines::montgomeryMultiply();
    7.63 +  if (stubAddr == NULL) {
    7.64 +    return false; // Intrinsic's stub is not implemented on this platform
    7.65 +  }
    7.66 +
    7.67 +  assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform");
    7.68 +  const char* stubName = "montgomery_square";
    7.69 +
    7.70 +  assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");
    7.71 +
    7.72 +  Node* a    = argument(0);
    7.73 +  Node* b    = argument(1);
    7.74 +  Node* n    = argument(2);
    7.75 +  Node* len  = argument(3);
    7.76 +  Node* inv  = argument(4);
    7.77 +  Node* m    = argument(6);
    7.78 +
    7.79 +  const Type* a_type = a->Value(&_gvn);
    7.80 +  const TypeAryPtr* top_a = a_type->isa_aryptr();
    7.81 +  const Type* b_type = b->Value(&_gvn);
    7.82 +  const TypeAryPtr* top_b = b_type->isa_aryptr();
    7.83 +  const Type* n_type = a->Value(&_gvn);
    7.84 +  const TypeAryPtr* top_n = n_type->isa_aryptr();
    7.85 +  const Type* m_type = a->Value(&_gvn);
    7.86 +  const TypeAryPtr* top_m = m_type->isa_aryptr();
    7.87 +  if (top_a  == NULL || top_a->klass()  == NULL ||
    7.88 +      top_b == NULL || top_b->klass()  == NULL ||
    7.89 +      top_n == NULL || top_n->klass()  == NULL ||
    7.90 +      top_m == NULL || top_m->klass()  == NULL) {
    7.91 +    // failed array check
    7.92 +    return false;
    7.93 +  }
    7.94 +
    7.95 +  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
    7.96 +  BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
    7.97 +  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
    7.98 +  BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
    7.99 +  if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT || m_elem != T_INT) {
   7.100 +    return false;
   7.101 +  }
   7.102 +
   7.103 +  // Make the call
   7.104 +  {
   7.105 +    Node* a_start = array_element_address(a, intcon(0), a_elem);
   7.106 +    Node* b_start = array_element_address(b, intcon(0), b_elem);
   7.107 +    Node* n_start = array_element_address(n, intcon(0), n_elem);
   7.108 +    Node* m_start = array_element_address(m, intcon(0), m_elem);
   7.109 +
   7.110 +    Node* call = make_runtime_call(RC_LEAF,
   7.111 +                                   OptoRuntime::montgomeryMultiply_Type(),
   7.112 +                                   stubAddr, stubName, TypePtr::BOTTOM,
   7.113 +                                   a_start, b_start, n_start, len, inv, top(),
   7.114 +                                   m_start);
   7.115 +    set_result(m);
   7.116 +  }
   7.117 +
   7.118 +  return true;
   7.119 +}
   7.120 +
   7.121 +bool LibraryCallKit::inline_montgomerySquare() {
   7.122 +  address stubAddr = StubRoutines::montgomerySquare();
   7.123 +  if (stubAddr == NULL) {
   7.124 +    return false; // Intrinsic's stub is not implemented on this platform
   7.125 +  }
   7.126 +
   7.127 +  assert(UseMontgomerySquareIntrinsic, "not implemented on this platform");
   7.128 +  const char* stubName = "montgomery_square";
   7.129 +
   7.130 +  assert(callee()->signature()->size() == 6, "montgomerySquare has 6 parameters");
   7.131 +
   7.132 +  Node* a    = argument(0);
   7.133 +  Node* n    = argument(1);
   7.134 +  Node* len  = argument(2);
   7.135 +  Node* inv  = argument(3);
   7.136 +  Node* m    = argument(5);
   7.137 +
   7.138 +  const Type* a_type = a->Value(&_gvn);
   7.139 +  const TypeAryPtr* top_a = a_type->isa_aryptr();
   7.140 +  const Type* n_type = a->Value(&_gvn);
   7.141 +  const TypeAryPtr* top_n = n_type->isa_aryptr();
   7.142 +  const Type* m_type = a->Value(&_gvn);
   7.143 +  const TypeAryPtr* top_m = m_type->isa_aryptr();
   7.144 +  if (top_a  == NULL || top_a->klass()  == NULL ||
   7.145 +      top_n == NULL || top_n->klass()  == NULL ||
   7.146 +      top_m == NULL || top_m->klass()  == NULL) {
   7.147 +    // failed array check
   7.148 +    return false;
   7.149 +  }
   7.150 +
   7.151 +  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
   7.152 +  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
   7.153 +  BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
   7.154 +  if (a_elem != T_INT || n_elem != T_INT || m_elem != T_INT) {
   7.155 +    return false;
   7.156 +  }
   7.157 +
   7.158 +  // Make the call
   7.159 +  {
   7.160 +    Node* a_start = array_element_address(a, intcon(0), a_elem);
   7.161 +    Node* n_start = array_element_address(n, intcon(0), n_elem);
   7.162 +    Node* m_start = array_element_address(m, intcon(0), m_elem);
   7.163 +
   7.164 +    Node* call = make_runtime_call(RC_LEAF,
   7.165 +                                   OptoRuntime::montgomerySquare_Type(),
   7.166 +                                   stubAddr, stubName, TypePtr::BOTTOM,
   7.167 +                                   a_start, n_start, len, inv, top(),
   7.168 +                                   m_start);
   7.169 +    set_result(m);
   7.170 +  }
   7.171 +
   7.172 +  return true;
   7.173 +}
   7.174 +
   7.175  
   7.176  /**
   7.177   * Calculate CRC32 for byte.
     8.1 --- a/src/share/vm/opto/runtime.cpp	Wed Feb 17 13:40:12 2016 +0300
     8.2 +++ b/src/share/vm/opto/runtime.cpp	Fri Mar 04 16:15:48 2016 +0300
     8.3 @@ -998,6 +998,52 @@
     8.4    return TypeFunc::make(domain, range);
     8.5  }
     8.6  
     8.7 +const TypeFunc* OptoRuntime::montgomeryMultiply_Type() {
     8.8 +  // create input type (domain)
     8.9 +  int num_args      = 7;
    8.10 +  int argcnt = num_args;
    8.11 +  const Type** fields = TypeTuple::fields(argcnt);
    8.12 +  int argp = TypeFunc::Parms;
    8.13 +  fields[argp++] = TypePtr::NOTNULL;    // a
    8.14 +  fields[argp++] = TypePtr::NOTNULL;    // b
    8.15 +  fields[argp++] = TypePtr::NOTNULL;    // n
    8.16 +  fields[argp++] = TypeInt::INT;        // len
    8.17 +  fields[argp++] = TypeLong::LONG;      // inv
    8.18 +  fields[argp++] = Type::HALF;
    8.19 +  fields[argp++] = TypePtr::NOTNULL;    // result
    8.20 +  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
    8.21 +  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
    8.22 +
    8.23 +  // result type needed
    8.24 +  fields = TypeTuple::fields(1);
    8.25 +  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
    8.26 +
    8.27 +  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
    8.28 +  return TypeFunc::make(domain, range);
    8.29 +}
    8.30 +
    8.31 +const TypeFunc* OptoRuntime::montgomerySquare_Type() {
    8.32 +  // create input type (domain)
    8.33 +  int num_args      = 6;
    8.34 +  int argcnt = num_args;
    8.35 +  const Type** fields = TypeTuple::fields(argcnt);
    8.36 +  int argp = TypeFunc::Parms;
    8.37 +  fields[argp++] = TypePtr::NOTNULL;    // a
    8.38 +  fields[argp++] = TypePtr::NOTNULL;    // n
    8.39 +  fields[argp++] = TypeInt::INT;        // len
    8.40 +  fields[argp++] = TypeLong::LONG;      // inv
    8.41 +  fields[argp++] = Type::HALF;
    8.42 +  fields[argp++] = TypePtr::NOTNULL;    // result
    8.43 +  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
    8.44 +  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
    8.45 +
    8.46 +  // result type needed
    8.47 +  fields = TypeTuple::fields(1);
    8.48 +  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
    8.49 +
    8.50 +  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
    8.51 +  return TypeFunc::make(domain, range);
    8.52 +}
    8.53  
    8.54  
    8.55  //------------- Interpreter state access for on stack replacement
     9.1 --- a/src/share/vm/opto/runtime.hpp	Wed Feb 17 13:40:12 2016 +0300
     9.2 +++ b/src/share/vm/opto/runtime.hpp	Fri Mar 04 16:15:48 2016 +0300
     9.3 @@ -308,6 +308,8 @@
     9.4    static const TypeFunc* squareToLen_Type();
     9.5  
     9.6    static const TypeFunc* mulAdd_Type();
     9.7 +  static const TypeFunc* montgomeryMultiply_Type();
     9.8 +  static const TypeFunc* montgomerySquare_Type();
     9.9  
    9.10    static const TypeFunc* updateBytesCRC32_Type();
    9.11  
    10.1 --- a/src/share/vm/runtime/sharedRuntime.hpp	Wed Feb 17 13:40:12 2016 +0300
    10.2 +++ b/src/share/vm/runtime/sharedRuntime.hpp	Fri Mar 04 16:15:48 2016 +0300
    10.3 @@ -145,6 +145,12 @@
    10.4    static double dsqrt(double f);
    10.5  #endif
    10.6  
    10.7 +  // Montgomery multiplication
    10.8 +  static void montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
    10.9 +                                  jint len, jlong inv, jint *m_ints);
   10.10 +  static void montgomery_square(jint *a_ints, jint *n_ints,
   10.11 +                                jint len, jlong inv, jint *m_ints);
   10.12 +
   10.13  #ifdef __SOFTFP__
   10.14    // C++ compiler generates soft float instructions as well as passing
   10.15    // float and double in registers.
    11.1 --- a/src/share/vm/runtime/stubRoutines.cpp	Wed Feb 17 13:40:12 2016 +0300
    11.2 +++ b/src/share/vm/runtime/stubRoutines.cpp	Fri Mar 04 16:15:48 2016 +0300
    11.3 @@ -138,6 +138,8 @@
    11.4  address StubRoutines::_multiplyToLen = NULL;
    11.5  address StubRoutines::_squareToLen = NULL;
    11.6  address StubRoutines::_mulAdd = NULL;
    11.7 +address StubRoutines::_montgomeryMultiply = NULL;
    11.8 +address StubRoutines::_montgomerySquare = NULL;
    11.9  
   11.10  double (* StubRoutines::_intrinsic_log   )(double) = NULL;
   11.11  double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
    12.1 --- a/src/share/vm/runtime/stubRoutines.hpp	Wed Feb 17 13:40:12 2016 +0300
    12.2 +++ b/src/share/vm/runtime/stubRoutines.hpp	Fri Mar 04 16:15:48 2016 +0300
    12.3 @@ -211,6 +211,8 @@
    12.4    static address _multiplyToLen;
    12.5    static address _squareToLen;
    12.6    static address _mulAdd;
    12.7 +  static address _montgomeryMultiply;
    12.8 +  static address _montgomerySquare;
    12.9  
   12.10    // These are versions of the java.lang.Math methods which perform
   12.11    // the same operations as the intrinsic version.  They are used for
   12.12 @@ -371,6 +373,8 @@
   12.13    static address multiplyToLen()       {return _multiplyToLen; }
   12.14    static address squareToLen()         {return _squareToLen; }
   12.15    static address mulAdd()              {return _mulAdd; }
   12.16 +  static address montgomeryMultiply()  { return _montgomeryMultiply; }
   12.17 +  static address montgomerySquare()    { return _montgomerySquare; }
   12.18  
   12.19    static address select_fill_function(BasicType t, bool aligned, const char* &name);
   12.20  
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java	Fri Mar 04 16:15:48 2016 +0300
    13.3 @@ -0,0 +1,278 @@
    13.4 +//
    13.5 +// Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
    13.6 +// Copyright (c) 2015, Red Hat Inc. All rights reserved.
    13.7 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    13.8 +//
    13.9 +// This code is free software; you can redistribute it and/or modify it
   13.10 +// under the terms of the GNU General Public License version 2 only, as
   13.11 +// published by the Free Software Foundation.
   13.12 +//
   13.13 +// This code is distributed in the hope that it will be useful, but WITHOUT
   13.14 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13.15 +// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13.16 +// version 2 for more details (a copy is included in the LICENSE file that
   13.17 +// accompanied this code).
   13.18 +//
   13.19 +// You should have received a copy of the GNU General Public License version
   13.20 +// 2 along with this work; if not, write to the Free Software Foundation,
   13.21 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   13.22 +//
   13.23 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   13.24 +// or visit www.oracle.com if you need additional information or have any
   13.25 +// questions.
   13.26 +//
   13.27 +//
   13.28 +
   13.29 +import java.lang.invoke.MethodHandle;
   13.30 +import java.lang.invoke.MethodHandles;
   13.31 +import java.lang.invoke.MethodType;
   13.32 +import java.lang.reflect.Constructor;
   13.33 +import java.lang.reflect.Field;
   13.34 +import java.lang.reflect.Method;
   13.35 +import java.math.BigInteger;
   13.36 +import java.util.Arrays;
   13.37 +import java.util.Random;
   13.38 +
   13.39 +/**
   13.40 + * @test
   13.41 + * @bug 8130150
   13.42 + * @library /testlibrary
   13.43 + * @requires (os.simpleArch == "x64") & (os.family != "windows")
   13.44 + * @summary Verify that the Montgomery multiply intrinsic works and correctly checks its arguments.
   13.45 + */
   13.46 +
   13.47 +public class MontgomeryMultiplyTest {
   13.48 +
   13.49 +    static final MethodHandles.Lookup lookup = MethodHandles.lookup();
   13.50 +
   13.51 +    static final MethodHandle montgomeryMultiplyHandle, montgomerySquareHandle;
   13.52 +    static final MethodHandle bigIntegerConstructorHandle;
   13.53 +    static final Field bigIntegerMagField;
   13.54 +
   13.55 +    static {
   13.56 +       // Use reflection to gain access to the methods we want to test.
   13.57 +        try {
   13.58 +            Method m = BigInteger.class.getDeclaredMethod("montgomeryMultiply",
   13.59 +                /*a*/int[].class, /*b*/int[].class, /*n*/int[].class, /*len*/int.class,
   13.60 +                /*inv*/long.class, /*product*/int[].class);
   13.61 +            m.setAccessible(true);
   13.62 +            montgomeryMultiplyHandle = lookup.unreflect(m);
   13.63 +
   13.64 +            m = BigInteger.class.getDeclaredMethod("montgomerySquare",
   13.65 +                /*a*/int[].class, /*n*/int[].class, /*len*/int.class,
   13.66 +                /*inv*/long.class, /*product*/int[].class);
   13.67 +            m.setAccessible(true);
   13.68 +            montgomerySquareHandle = lookup.unreflect(m);
   13.69 +
   13.70 +            Constructor c
   13.71 +                = BigInteger.class.getDeclaredConstructor(int.class, int[].class);
   13.72 +            c.setAccessible(true);
   13.73 +            bigIntegerConstructorHandle = lookup.unreflectConstructor(c);
   13.74 +
   13.75 +            bigIntegerMagField = BigInteger.class.getDeclaredField("mag");
   13.76 +            bigIntegerMagField.setAccessible(true);
   13.77 +
   13.78 +        } catch (Throwable ex) {
   13.79 +            throw new RuntimeException(ex);
   13.80 +        }
   13.81 +    }
   13.82 +
   13.83 +    // Invoke either BigInteger.montgomeryMultiply or BigInteger.montgomerySquare.
   13.84 +    int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv,
   13.85 +                             int[] product) throws Throwable {
   13.86 +        int[] result =
   13.87 +            (a == b) ? (int[]) montgomerySquareHandle.invokeExact(a, n, len, inv, product)
   13.88 +                     : (int[]) montgomeryMultiplyHandle.invokeExact(a, b, n, len, inv, product);
   13.89 +        return Arrays.copyOf(result, len);
   13.90 +    }
   13.91 +
   13.92 +    // Invoke the private constructor BigInteger(int[]).
   13.93 +    BigInteger newBigInteger(int[] val) throws Throwable {
   13.94 +        return (BigInteger) bigIntegerConstructorHandle.invokeExact(1, val);
   13.95 +    }
   13.96 +
   13.97 +    // Get the private field BigInteger.mag
   13.98 +    int[] mag(BigInteger n) {
   13.99 +        try {
  13.100 +            return (int[]) bigIntegerMagField.get(n);
  13.101 +        } catch (Exception ex) {
  13.102 +            throw new RuntimeException(ex);
  13.103 +        }
  13.104 +    }
  13.105 +
  13.106 +    // Montgomery multiplication
  13.107 +    // Calculate a * b * r^-1 mod n)
  13.108 +    //
  13.109 +    // R is a power of the word size
  13.110 +    // N' = R^-1 mod N
  13.111 +    //
  13.112 +    // T := ab
  13.113 +    // m := (T mod R)N' mod R [so 0 <= m < R]
  13.114 +    // t := (T + mN)/R
  13.115 +    // if t >= N then return t - N else return t
  13.116 +    //
  13.117 +    BigInteger montgomeryMultiply(BigInteger a, BigInteger b, BigInteger N,
  13.118 +            int len, BigInteger n_prime)
  13.119 +            throws Throwable {
  13.120 +        BigInteger T = a.multiply(b);
  13.121 +        BigInteger R = BigInteger.ONE.shiftLeft(len*32);
  13.122 +        BigInteger mask = R.subtract(BigInteger.ONE);
  13.123 +        BigInteger m = (T.and(mask)).multiply(n_prime);
  13.124 +        m = m.and(mask); // i.e. m.mod(R)
  13.125 +        T = T.add(m.multiply(N));
  13.126 +        T = T.shiftRight(len*32); // i.e. T.divide(R)
  13.127 +        if (T.compareTo(N) > 0) {
  13.128 +            T = T.subtract(N);
  13.129 +        }
  13.130 +        return T;
  13.131 +    }
  13.132 +
  13.133 +    // Call the Montgomery multiply intrinsic.
  13.134 +    BigInteger montgomeryMultiply(int[] a_words, int[] b_words, int[] n_words,
  13.135 +            int len, BigInteger inv)
  13.136 +            throws Throwable {
  13.137 +        BigInteger t = montgomeryMultiply(
  13.138 +                newBigInteger(a_words),
  13.139 +                newBigInteger(b_words),
  13.140 +                newBigInteger(n_words),
  13.141 +                len, inv);
  13.142 +        return t;
  13.143 +    }
  13.144 +
  13.145 +    // Check that the Montgomery multiply intrinsic returns the same
  13.146 +    // result as the longhand calculation.
  13.147 +    void check(int[] a_words, int[] b_words, int[] n_words, int len, BigInteger inv)
  13.148 +            throws Throwable {
  13.149 +        BigInteger n = newBigInteger(n_words);
  13.150 +        BigInteger slow = montgomeryMultiply(a_words, b_words, n_words, len, inv);
  13.151 +        BigInteger fast
  13.152 +            = newBigInteger(montgomeryMultiply
  13.153 +                            (a_words, b_words, n_words, len, inv.longValue(), null));
  13.154 +        // The intrinsic may not return the same value as the longhand
  13.155 +        // calculation but they must have the same residue mod N.
  13.156 +        if (!slow.mod(n).equals(fast.mod(n))) {
  13.157 +            throw new RuntimeException();
  13.158 +        }
  13.159 +    }
  13.160 +
  13.161 +    Random rnd = new Random(0);
  13.162 +
  13.163 +    // Return a random value of length <= bits in an array of even length
  13.164 +    int[] random_val(int bits) {
  13.165 +        int len = (bits+63)/64;  // i.e. length in longs
  13.166 +        int[] val = new int[len*2];
  13.167 +        for (int i = 0; i < val.length; i++)
  13.168 +            val[i] = rnd.nextInt();
  13.169 +        int leadingZeros = 64 - (bits & 64);
  13.170 +        if (leadingZeros >= 32) {
  13.171 +            val[0] = 0;
  13.172 +            val[1] &= ~(-1l << (leadingZeros & 31));
  13.173 +        } else {
  13.174 +            val[0] &= ~(-1l << leadingZeros);
  13.175 +        }
  13.176 +        return val;
  13.177 +    }
  13.178 +
  13.179 +    void testOneLength(int lenInBits, int lenInInts) throws Throwable {
  13.180 +        BigInteger mod = new BigInteger(lenInBits, 2, rnd);
  13.181 +        BigInteger r = BigInteger.ONE.shiftLeft(lenInInts * 32);
  13.182 +        BigInteger n_prime = mod.modInverse(r).negate();
  13.183 +
  13.184 +        // Make n.length even, padding with a zero if necessary
  13.185 +        int[] n = mag(mod);
  13.186 +        if (n.length < lenInInts) {
  13.187 +            int[] x = new int[lenInInts];
  13.188 +            System.arraycopy(n, 0, x, lenInInts-n.length, n.length);
  13.189 +            n = x;
  13.190 +        }
  13.191 +
  13.192 +        for (int i = 0; i < 10000; i++) {
  13.193 +            // multiply
  13.194 +            check(random_val(lenInBits), random_val(lenInBits), n, lenInInts, n_prime);
  13.195 +            // square
  13.196 +            int[] tmp = random_val(lenInBits);
  13.197 +            check(tmp, tmp, n, lenInInts, n_prime);
  13.198 +        }
  13.199 +    }
  13.200 +
  13.201 +    // Test the Montgomery multiply intrinsic with a bunch of random
  13.202 +    // values of varying lengths.  Do this for long enough that the
  13.203 +    // caller of the intrinsic is C2-compiled.
  13.204 +    void testResultValues() throws Throwable {
  13.205 +        // Test a couple of interesting edge cases.
  13.206 +        testOneLength(1024, 32);
  13.207 +        testOneLength(1025, 34);
  13.208 +        for (int j = 10; j > 0; j--) {
  13.209 +            // Construct a random prime whose length in words is even
  13.210 +            int lenInBits = rnd.nextInt(2048) + 64;
  13.211 +            int lenInInts = (lenInBits + 63)/64*2;
  13.212 +            testOneLength(lenInBits, lenInInts);
  13.213 +        }
  13.214 +    }
  13.215 +
  13.216 +    // Range checks
  13.217 +    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, int[] n, int len, long inv,
  13.218 +                                        int[] product, Class klass) {
  13.219 +        try {
  13.220 +            montgomeryMultiply(a, b, n, len, inv, product);
  13.221 +        } catch (Throwable ex) {
  13.222 +            if (klass.isAssignableFrom(ex.getClass()))
  13.223 +                return;
  13.224 +            throw new RuntimeException(klass + " expected, " + ex + " was thrown");
  13.225 +        }
  13.226 +        throw new RuntimeException(klass + " expected, was not thrown");
  13.227 +    }
  13.228 +
  13.229 +    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv,
  13.230 +            Class klass) {
  13.231 +        testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), null, klass);
  13.232 +    }
  13.233 +
  13.234 +    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv,
  13.235 +            int[] product, Class klass) {
  13.236 +        testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), product, klass);
  13.237 +    }
  13.238 +
  13.239 +    void testMontgomeryMultiplyChecks() {
  13.240 +        int[] blah = random_val(40);
  13.241 +        int[] small = random_val(39);
  13.242 +        BigInteger mod = new BigInteger(40*32 , 2, rnd);
  13.243 +        BigInteger r = BigInteger.ONE.shiftLeft(40*32);
  13.244 +        BigInteger n_prime = mod.modInverse(r).negate();
  13.245 +
  13.246 +        // Length out of range: square
  13.247 +        testOneMontgomeryMultiplyCheck(blah, blah, mod, 41, n_prime, IllegalArgumentException.class);
  13.248 +        testOneMontgomeryMultiplyCheck(blah, blah, mod, 0, n_prime, IllegalArgumentException.class);
  13.249 +        testOneMontgomeryMultiplyCheck(blah, blah, mod, -1, n_prime, IllegalArgumentException.class);
  13.250 +        // As above, but for multiply
  13.251 +        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 41, n_prime, IllegalArgumentException.class);
  13.252 +        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class);
  13.253 +        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class);
  13.254 +
  13.255 +        // Length odd
  13.256 +        testOneMontgomeryMultiplyCheck(small, small, mod, 39, n_prime, IllegalArgumentException.class);
  13.257 +        testOneMontgomeryMultiplyCheck(small, small, mod, 0, n_prime, IllegalArgumentException.class);
  13.258 +        testOneMontgomeryMultiplyCheck(small, small, mod, -1, n_prime, IllegalArgumentException.class);
  13.259 +        // As above, but for multiply
  13.260 +        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 39, n_prime, IllegalArgumentException.class);
  13.261 +        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 0, n_prime, IllegalArgumentException.class);
  13.262 +        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, -1, n_prime, IllegalArgumentException.class);
  13.263 +
  13.264 +        // array too small
  13.265 +        testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class);
  13.266 +        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 40, n_prime, small, IllegalArgumentException.class);
  13.267 +        testOneMontgomeryMultiplyCheck(small, blah, mod, 40, n_prime, blah, IllegalArgumentException.class);
  13.268 +        testOneMontgomeryMultiplyCheck(blah, small, mod, 40, n_prime, blah, IllegalArgumentException.class);
  13.269 +        testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class);
  13.270 +        testOneMontgomeryMultiplyCheck(small, small, mod, 40, n_prime, blah, IllegalArgumentException.class);
  13.271 +    }
  13.272 +
  13.273 +    public static void main(String args[]) {
  13.274 +        try {
  13.275 +            new MontgomeryMultiplyTest().testMontgomeryMultiplyChecks();
  13.276 +            new MontgomeryMultiplyTest().testResultValues();
  13.277 +        } catch (Throwable ex) {
  13.278 +            throw new RuntimeException(ex);
  13.279 +        }
  13.280 +     }
  13.281 +}

mercurial