Fri, 04 Mar 2016 16:15:48 +0300
8130150: Implement BigInteger.montgomeryMultiply intrinsic
Reviewed-by: kvn, mdoerr
1.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Feb 17 13:40:12 2016 +0300 1.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Fri Mar 04 16:15:48 2016 +0300 1.3 @@ -23,6 +23,9 @@ 1.4 */ 1.5 1.6 #include "precompiled.hpp" 1.7 +#ifndef _WINDOWS 1.8 +#include "alloca.h" 1.9 +#endif 1.10 #include "asm/macroAssembler.hpp" 1.11 #include "asm/macroAssembler.inline.hpp" 1.12 #include "code/debugInfoRec.hpp" 1.13 @@ -3966,6 +3969,256 @@ 1.14 } 1.15 1.16 1.17 +//------------------------------Montgomery multiplication------------------------ 1.18 +// 1.19 + 1.20 +#ifndef _WINDOWS 1.21 + 1.22 +#define ASM_SUBTRACT 1.23 + 1.24 +#ifdef ASM_SUBTRACT 1.25 +// Subtract 0:b from carry:a. Return carry. 1.26 +static unsigned long 1.27 +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { 1.28 + long i = 0, cnt = len; 1.29 + unsigned long tmp; 1.30 + asm volatile("clc; " 1.31 + "0: ; " 1.32 + "mov (%[b], %[i], 8), %[tmp]; " 1.33 + "sbb %[tmp], (%[a], %[i], 8); " 1.34 + "inc %[i]; dec %[cnt]; " 1.35 + "jne 0b; " 1.36 + "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 1.37 + : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 1.38 + : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 1.39 + : "memory"); 1.40 + return tmp; 1.41 +} 1.42 +#else // ASM_SUBTRACT 1.43 +typedef int __attribute__((mode(TI))) int128; 1.44 + 1.45 +// Subtract 0:b from carry:a. Return carry. 1.46 +static unsigned long 1.47 +sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) { 1.48 + int128 tmp = 0; 1.49 + int i; 1.50 + for (i = 0; i < len; i++) { 1.51 + tmp += a[i]; 1.52 + tmp -= b[i]; 1.53 + a[i] = tmp; 1.54 + tmp >>= 64; 1.55 + assert(-1 <= tmp && tmp <= 0, "invariant"); 1.56 + } 1.57 + return tmp + carry; 1.58 +} 1.59 +#endif // ! ASM_SUBTRACT 1.60 + 1.61 +// Multiply (unsigned) Long A by Long B, accumulating the double- 1.62 +// length result into the accumulator formed of T0, T1, and T2. 1.63 +#define MACC(A, B, T0, T1, T2) \ 1.64 +do { \ 1.65 + unsigned long hi, lo; \ 1.66 + asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 1.67 + : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 1.68 + : "r"(A), "a"(B) : "cc"); \ 1.69 + } while(0) 1.70 + 1.71 +// As above, but add twice the double-length result into the 1.72 +// accumulator. 1.73 +#define MACC2(A, B, T0, T1, T2) \ 1.74 +do { \ 1.75 + unsigned long hi, lo; \ 1.76 + asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4;" \ 1.77 + "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 1.78 + : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 1.79 + : "r"(A), "a"(B) : "cc"); \ 1.80 + } while(0) 1.81 + 1.82 +// Fast Montgomery multiplication. The derivation of the algorithm is 1.83 +// in A Cryptographic Library for the Motorola DSP56000, 1.84 +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 1.85 + 1.86 +static void __attribute__((noinline)) 1.87 +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], 1.88 + unsigned long m[], unsigned long inv, int len) { 1.89 + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 1.90 + int i; 1.91 + 1.92 + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 1.93 + 1.94 + for (i = 0; i < len; i++) { 1.95 + int j; 1.96 + for (j = 0; j < i; j++) { 1.97 + MACC(a[j], b[i-j], t0, t1, t2); 1.98 + MACC(m[j], n[i-j], t0, t1, t2); 1.99 + } 1.100 + MACC(a[i], b[0], t0, t1, t2); 1.101 + m[i] = t0 * inv; 1.102 + MACC(m[i], n[0], t0, t1, t2); 1.103 + 1.104 + assert(t0 == 0, "broken Montgomery multiply"); 1.105 + 1.106 + t0 = t1; t1 = t2; t2 = 0; 1.107 + } 1.108 + 1.109 + for (i = len; i < 2*len; i++) { 1.110 + int j; 1.111 + for (j = i-len+1; j < len; j++) { 1.112 + MACC(a[j], b[i-j], t0, t1, t2); 1.113 + MACC(m[j], n[i-j], t0, t1, t2); 1.114 + } 1.115 + m[i-len] = t0; 1.116 + t0 = t1; t1 = t2; t2 = 0; 1.117 + } 1.118 + 1.119 + while (t0) 1.120 + t0 = sub(m, n, t0, len); 1.121 +} 1.122 + 1.123 +// Fast Montgomery squaring. This uses asymptotically 25% fewer 1.124 +// multiplies so it should be up to 25% faster than Montgomery 1.125 +// multiplication. However, its loop control is more complex and it 1.126 +// may actually run slower on some machines. 1.127 + 1.128 +static void __attribute__((noinline)) 1.129 +montgomery_square(unsigned long a[], unsigned long n[], 1.130 + unsigned long m[], unsigned long inv, int len) { 1.131 + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 1.132 + int i; 1.133 + 1.134 + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 1.135 + 1.136 + for (i = 0; i < len; i++) { 1.137 + int j; 1.138 + int end = (i+1)/2; 1.139 + for (j = 0; j < end; j++) { 1.140 + MACC2(a[j], a[i-j], t0, t1, t2); 1.141 + MACC(m[j], n[i-j], t0, t1, t2); 1.142 + } 1.143 + if ((i & 1) == 0) { 1.144 + MACC(a[j], a[j], t0, t1, t2); 1.145 + } 1.146 + for (; j < i; j++) { 1.147 + MACC(m[j], n[i-j], t0, t1, t2); 1.148 + } 1.149 + m[i] = t0 * inv; 1.150 + MACC(m[i], n[0], t0, t1, t2); 1.151 + 1.152 + assert(t0 == 0, "broken Montgomery square"); 1.153 + 1.154 + t0 = t1; t1 = t2; t2 = 0; 1.155 + } 1.156 + 1.157 + for (i = len; i < 2*len; i++) { 1.158 + int start = i-len+1; 1.159 + int end = start + (len - start)/2; 1.160 + int j; 1.161 + for (j = start; j < end; j++) { 1.162 + MACC2(a[j], a[i-j], t0, t1, t2); 1.163 + MACC(m[j], n[i-j], t0, t1, t2); 1.164 + } 1.165 + if ((i & 1) == 0) { 1.166 + MACC(a[j], a[j], t0, t1, t2); 1.167 + } 1.168 + for (; j < len; j++) { 1.169 + MACC(m[j], n[i-j], t0, t1, t2); 1.170 + } 1.171 + m[i-len] = t0; 1.172 + t0 = t1; t1 = t2; t2 = 0; 1.173 + } 1.174 + 1.175 + while (t0) 1.176 + t0 = sub(m, n, t0, len); 1.177 +} 1.178 + 1.179 +// Swap words in a longword. 1.180 +static unsigned long swap(unsigned long x) { 1.181 + return (x << 32) | (x >> 32); 1.182 +} 1.183 + 1.184 +// Copy len longwords from s to d, word-swapping as we go. The 1.185 +// destination array is reversed. 1.186 +static void reverse_words(unsigned long *s, unsigned long *d, int len) { 1.187 + d += len; 1.188 + while(len-- > 0) { 1.189 + d--; 1.190 + *d = swap(*s); 1.191 + s++; 1.192 + } 1.193 +} 1.194 + 1.195 +// The threshold at which squaring is advantageous was determined 1.196 +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 1.197 +#define MONTGOMERY_SQUARING_THRESHOLD 64 1.198 + 1.199 +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 1.200 + jint len, jlong inv, 1.201 + jint *m_ints) { 1.202 + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 1.203 + int longwords = len/2; 1.204 + 1.205 + // Make very sure we don't use so much space that the stack might 1.206 + // overflow. 512 jints corresponds to an 16384-bit integer and 1.207 + // will use here a total of 8k bytes of stack space. 1.208 + int total_allocation = longwords * sizeof (unsigned long) * 4; 1.209 + guarantee(total_allocation <= 8192, "must be"); 1.210 + unsigned long *scratch = (unsigned long *)alloca(total_allocation); 1.211 + 1.212 + // Local scratch arrays 1.213 + unsigned long 1.214 + *a = scratch + 0 * longwords, 1.215 + *b = scratch + 1 * longwords, 1.216 + *n = scratch + 2 * longwords, 1.217 + *m = scratch + 3 * longwords; 1.218 + 1.219 + reverse_words((unsigned long *)a_ints, a, longwords); 1.220 + reverse_words((unsigned long *)b_ints, b, longwords); 1.221 + reverse_words((unsigned long *)n_ints, n, longwords); 1.222 + 1.223 + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); 1.224 + 1.225 + reverse_words(m, (unsigned long *)m_ints, longwords); 1.226 +} 1.227 + 1.228 +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 1.229 + jint len, jlong inv, 1.230 + jint *m_ints) { 1.231 + assert(len % 2 == 0, "array length in montgomery_square must be even"); 1.232 + int longwords = len/2; 1.233 + 1.234 + // Make very sure we don't use so much space that the stack might 1.235 + // overflow. 512 jints corresponds to an 16384-bit integer and 1.236 + // will use here a total of 6k bytes of stack space. 1.237 + int total_allocation = longwords * sizeof (unsigned long) * 3; 1.238 + guarantee(total_allocation <= 8192, "must be"); 1.239 + unsigned long *scratch = (unsigned long *)alloca(total_allocation); 1.240 + 1.241 + // Local scratch arrays 1.242 + unsigned long 1.243 + *a = scratch + 0 * longwords, 1.244 + *n = scratch + 1 * longwords, 1.245 + *m = scratch + 2 * longwords; 1.246 + 1.247 + reverse_words((unsigned long *)a_ints, a, longwords); 1.248 + reverse_words((unsigned long *)n_ints, n, longwords); 1.249 + 1.250 + //montgomery_square fails to pass BigIntegerTest on solaris amd64 1.251 + //on jdk7 and jdk8. 1.252 +#ifndef SOLARIS 1.253 + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 1.254 +#else 1.255 + if (0) { 1.256 +#endif 1.257 + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); 1.258 + } else { 1.259 + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); 1.260 + } 1.261 + 1.262 + reverse_words(m, (unsigned long *)m_ints, longwords); 1.263 +} 1.264 + 1.265 +#endif // WINDOWS 1.266 + 1.267 #ifdef COMPILER2 1.268 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 1.269 //
2.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Feb 17 13:40:12 2016 +0300 2.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Mar 04 16:15:48 2016 +0300 2.3 @@ -4094,7 +4094,18 @@ 2.4 if (UseMulAddIntrinsic) { 2.5 StubRoutines::_mulAdd = generate_mulAdd(); 2.6 } 2.7 -#endif 2.8 + 2.9 +#ifndef _WINDOWS 2.10 + if (UseMontgomeryMultiplyIntrinsic) { 2.11 + StubRoutines::_montgomeryMultiply 2.12 + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 2.13 + } 2.14 + if (UseMontgomerySquareIntrinsic) { 2.15 + StubRoutines::_montgomerySquare 2.16 + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 2.17 + } 2.18 +#endif // WINDOWS 2.19 +#endif // COMPILER2 2.20 } 2.21 2.22 public:
3.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Wed Feb 17 13:40:12 2016 +0300 3.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Fri Mar 04 16:15:48 2016 +0300 3.3 @@ -709,6 +709,12 @@ 3.4 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 3.5 UseMulAddIntrinsic = true; 3.6 } 3.7 + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 3.8 + UseMontgomeryMultiplyIntrinsic = true; 3.9 + } 3.10 + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 3.11 + UseMontgomerySquareIntrinsic = true; 3.12 + } 3.13 #else 3.14 if (UseMultiplyToLenIntrinsic) { 3.15 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 3.16 @@ -728,6 +734,18 @@ 3.17 } 3.18 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 3.19 } 3.20 + if (UseMontgomeryMultiplyIntrinsic) { 3.21 + if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 3.22 + warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 3.23 + } 3.24 + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 3.25 + } 3.26 + if (UseMontgomerySquareIntrinsic) { 3.27 + if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 3.28 + warning("montgomerySquare intrinsic is not available in 32-bit VM"); 3.29 + } 3.30 + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 3.31 + } 3.32 #endif 3.33 #endif // COMPILER2 3.34
4.1 --- a/src/share/vm/classfile/vmSymbols.hpp Wed Feb 17 13:40:12 2016 +0300 4.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Fri Mar 04 16:15:48 2016 +0300 4.3 @@ -789,7 +789,7 @@ 4.4 do_signature(encodeISOArray_signature, "([CI[BII)I") \ 4.5 \ 4.6 do_class(java_math_BigInteger, "java/math/BigInteger") \ 4.7 - do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R) \ 4.8 + do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S) \ 4.9 do_name( multiplyToLen_name, "multiplyToLen") \ 4.10 do_signature(multiplyToLen_signature, "([II[II[I)[I") \ 4.11 \ 4.12 @@ -801,6 +801,14 @@ 4.13 do_name( mulAdd_name, "implMulAdd") \ 4.14 do_signature(mulAdd_signature, "([I[IIII)I") \ 4.15 \ 4.16 + do_intrinsic(_montgomeryMultiply, java_math_BigInteger, montgomeryMultiply_name, montgomeryMultiply_signature, F_S) \ 4.17 + do_name( montgomeryMultiply_name, "implMontgomeryMultiply") \ 4.18 + do_signature(montgomeryMultiply_signature, "([I[I[IIJ[I)[I") \ 4.19 + \ 4.20 + do_intrinsic(_montgomerySquare, java_math_BigInteger, montgomerySquare_name, montgomerySquare_signature, F_S) \ 4.21 + do_name( montgomerySquare_name, "implMontgomerySquare") \ 4.22 + do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \ 4.23 + \ 4.24 /* java/lang/ref/Reference */ \ 4.25 do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ 4.26 \
5.1 --- a/src/share/vm/opto/c2_globals.hpp Wed Feb 17 13:40:12 2016 +0300 5.2 +++ b/src/share/vm/opto/c2_globals.hpp Fri Mar 04 16:15:48 2016 +0300 5.3 @@ -665,6 +665,12 @@ 5.4 product(bool, UseMulAddIntrinsic, false, \ 5.5 "Enables intrinsification of BigInteger.mulAdd()") \ 5.6 \ 5.7 + product(bool, UseMontgomeryMultiplyIntrinsic, false, \ 5.8 + "Enables intrinsification of BigInteger.montgomeryMultiply()") \ 5.9 + \ 5.10 + product(bool, UseMontgomerySquareIntrinsic, false, \ 5.11 + "Enables intrinsification of BigInteger.montgomerySquare()") \ 5.12 + \ 5.13 product(bool, UseTypeSpeculation, true, \ 5.14 "Speculatively propagate types from profiles") \ 5.15 \
6.1 --- a/src/share/vm/opto/escape.cpp Wed Feb 17 13:40:12 2016 +0300 6.2 +++ b/src/share/vm/opto/escape.cpp Fri Mar 04 16:15:48 2016 +0300 6.3 @@ -960,8 +960,10 @@ 6.4 strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 || 6.5 strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 || 6.6 strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 || 6.7 - strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0) 6.8 - ))) { 6.9 + strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 || 6.10 + strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 || 6.11 + strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0) 6.12 + ))) { 6.13 call->dump(); 6.14 fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); 6.15 }
7.1 --- a/src/share/vm/opto/library_call.cpp Wed Feb 17 13:40:12 2016 +0300 7.2 +++ b/src/share/vm/opto/library_call.cpp Fri Mar 04 16:15:48 2016 +0300 7.3 @@ -326,6 +326,8 @@ 7.4 bool inline_multiplyToLen(); 7.5 bool inline_squareToLen(); 7.6 bool inline_mulAdd(); 7.7 + bool inline_montgomeryMultiply(); 7.8 + bool inline_montgomerySquare(); 7.9 7.10 bool inline_profileBoolean(); 7.11 }; 7.12 @@ -537,6 +539,13 @@ 7.13 if (!UseMulAddIntrinsic) return NULL; 7.14 break; 7.15 7.16 + case vmIntrinsics::_montgomeryMultiply: 7.17 + if (!UseMontgomeryMultiplyIntrinsic) return NULL; 7.18 + break; 7.19 + case vmIntrinsics::_montgomerySquare: 7.20 + if (!UseMontgomerySquareIntrinsic) return NULL; 7.21 + break; 7.22 + 7.23 case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: 7.24 case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: 7.25 if (!UseAESIntrinsics) return NULL; 7.26 @@ -943,6 +952,11 @@ 7.27 case vmIntrinsics::_mulAdd: 7.28 return inline_mulAdd(); 7.29 7.30 + case vmIntrinsics::_montgomeryMultiply: 7.31 + return inline_montgomeryMultiply(); 7.32 + case vmIntrinsics::_montgomerySquare: 7.33 + return inline_montgomerySquare(); 7.34 + 7.35 case vmIntrinsics::_encodeISOArray: 7.36 return inline_encodeISOArray(); 7.37 7.38 @@ -5783,11 +5797,12 @@ 7.39 7.40 assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters"); 7.41 7.42 - Node* x = argument(1); 7.43 - Node* xlen = argument(2); 7.44 - Node* y = argument(3); 7.45 - Node* ylen = argument(4); 7.46 - Node* z = argument(5); 7.47 + // no receiver because it is a static method 7.48 + Node* x = argument(0); 7.49 + Node* xlen = argument(1); 7.50 + Node* y = argument(2); 7.51 + Node* ylen = argument(3); 7.52 + Node* z = argument(4); 7.53 7.54 const Type* x_type = x->Value(&_gvn); 7.55 const Type* y_type = y->Value(&_gvn); 7.56 @@ -5966,6 +5981,121 @@ 7.57 return true; 7.58 } 7.59 7.60 +//-------------inline_montgomeryMultiply----------------------------------- 7.61 +bool LibraryCallKit::inline_montgomeryMultiply() { 7.62 + address stubAddr = StubRoutines::montgomeryMultiply(); 7.63 + if (stubAddr == NULL) { 7.64 + return false; // Intrinsic's stub is not implemented on this platform 7.65 + } 7.66 + 7.67 + assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform"); 7.68 + const char* stubName = "montgomery_square"; 7.69 + 7.70 + assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters"); 7.71 + 7.72 + Node* a = argument(0); 7.73 + Node* b = argument(1); 7.74 + Node* n = argument(2); 7.75 + Node* len = argument(3); 7.76 + Node* inv = argument(4); 7.77 + Node* m = argument(6); 7.78 + 7.79 + const Type* a_type = a->Value(&_gvn); 7.80 + const TypeAryPtr* top_a = a_type->isa_aryptr(); 7.81 + const Type* b_type = b->Value(&_gvn); 7.82 + const TypeAryPtr* top_b = b_type->isa_aryptr(); 7.83 + const Type* n_type = a->Value(&_gvn); 7.84 + const TypeAryPtr* top_n = n_type->isa_aryptr(); 7.85 + const Type* m_type = a->Value(&_gvn); 7.86 + const TypeAryPtr* top_m = m_type->isa_aryptr(); 7.87 + if (top_a == NULL || top_a->klass() == NULL || 7.88 + top_b == NULL || top_b->klass() == NULL || 7.89 + top_n == NULL || top_n->klass() == NULL || 7.90 + top_m == NULL || top_m->klass() == NULL) { 7.91 + // failed array check 7.92 + return false; 7.93 + } 7.94 + 7.95 + BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.96 + BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.97 + BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.98 + BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.99 + if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT || m_elem != T_INT) { 7.100 + return false; 7.101 + } 7.102 + 7.103 + // Make the call 7.104 + { 7.105 + Node* a_start = array_element_address(a, intcon(0), a_elem); 7.106 + Node* b_start = array_element_address(b, intcon(0), b_elem); 7.107 + Node* n_start = array_element_address(n, intcon(0), n_elem); 7.108 + Node* m_start = array_element_address(m, intcon(0), m_elem); 7.109 + 7.110 + Node* call = make_runtime_call(RC_LEAF, 7.111 + OptoRuntime::montgomeryMultiply_Type(), 7.112 + stubAddr, stubName, TypePtr::BOTTOM, 7.113 + a_start, b_start, n_start, len, inv, top(), 7.114 + m_start); 7.115 + set_result(m); 7.116 + } 7.117 + 7.118 + return true; 7.119 +} 7.120 + 7.121 +bool LibraryCallKit::inline_montgomerySquare() { 7.122 + address stubAddr = StubRoutines::montgomerySquare(); 7.123 + if (stubAddr == NULL) { 7.124 + return false; // Intrinsic's stub is not implemented on this platform 7.125 + } 7.126 + 7.127 + assert(UseMontgomerySquareIntrinsic, "not implemented on this platform"); 7.128 + const char* stubName = "montgomery_square"; 7.129 + 7.130 + assert(callee()->signature()->size() == 6, "montgomerySquare has 6 parameters"); 7.131 + 7.132 + Node* a = argument(0); 7.133 + Node* n = argument(1); 7.134 + Node* len = argument(2); 7.135 + Node* inv = argument(3); 7.136 + Node* m = argument(5); 7.137 + 7.138 + const Type* a_type = a->Value(&_gvn); 7.139 + const TypeAryPtr* top_a = a_type->isa_aryptr(); 7.140 + const Type* n_type = a->Value(&_gvn); 7.141 + const TypeAryPtr* top_n = n_type->isa_aryptr(); 7.142 + const Type* m_type = a->Value(&_gvn); 7.143 + const TypeAryPtr* top_m = m_type->isa_aryptr(); 7.144 + if (top_a == NULL || top_a->klass() == NULL || 7.145 + top_n == NULL || top_n->klass() == NULL || 7.146 + top_m == NULL || top_m->klass() == NULL) { 7.147 + // failed array check 7.148 + return false; 7.149 + } 7.150 + 7.151 + BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.152 + BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.153 + BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); 7.154 + if (a_elem != T_INT || n_elem != T_INT || m_elem != T_INT) { 7.155 + return false; 7.156 + } 7.157 + 7.158 + // Make the call 7.159 + { 7.160 + Node* a_start = array_element_address(a, intcon(0), a_elem); 7.161 + Node* n_start = array_element_address(n, intcon(0), n_elem); 7.162 + Node* m_start = array_element_address(m, intcon(0), m_elem); 7.163 + 7.164 + Node* call = make_runtime_call(RC_LEAF, 7.165 + OptoRuntime::montgomerySquare_Type(), 7.166 + stubAddr, stubName, TypePtr::BOTTOM, 7.167 + a_start, n_start, len, inv, top(), 7.168 + m_start); 7.169 + set_result(m); 7.170 + } 7.171 + 7.172 + return true; 7.173 +} 7.174 + 7.175 7.176 /** 7.177 * Calculate CRC32 for byte.
8.1 --- a/src/share/vm/opto/runtime.cpp Wed Feb 17 13:40:12 2016 +0300 8.2 +++ b/src/share/vm/opto/runtime.cpp Fri Mar 04 16:15:48 2016 +0300 8.3 @@ -998,6 +998,52 @@ 8.4 return TypeFunc::make(domain, range); 8.5 } 8.6 8.7 +const TypeFunc* OptoRuntime::montgomeryMultiply_Type() { 8.8 + // create input type (domain) 8.9 + int num_args = 7; 8.10 + int argcnt = num_args; 8.11 + const Type** fields = TypeTuple::fields(argcnt); 8.12 + int argp = TypeFunc::Parms; 8.13 + fields[argp++] = TypePtr::NOTNULL; // a 8.14 + fields[argp++] = TypePtr::NOTNULL; // b 8.15 + fields[argp++] = TypePtr::NOTNULL; // n 8.16 + fields[argp++] = TypeInt::INT; // len 8.17 + fields[argp++] = TypeLong::LONG; // inv 8.18 + fields[argp++] = Type::HALF; 8.19 + fields[argp++] = TypePtr::NOTNULL; // result 8.20 + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 8.21 + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); 8.22 + 8.23 + // result type needed 8.24 + fields = TypeTuple::fields(1); 8.25 + fields[TypeFunc::Parms+0] = TypePtr::NOTNULL; 8.26 + 8.27 + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); 8.28 + return TypeFunc::make(domain, range); 8.29 +} 8.30 + 8.31 +const TypeFunc* OptoRuntime::montgomerySquare_Type() { 8.32 + // create input type (domain) 8.33 + int num_args = 6; 8.34 + int argcnt = num_args; 8.35 + const Type** fields = TypeTuple::fields(argcnt); 8.36 + int argp = TypeFunc::Parms; 8.37 + fields[argp++] = TypePtr::NOTNULL; // a 8.38 + fields[argp++] = TypePtr::NOTNULL; // n 8.39 + fields[argp++] = TypeInt::INT; // len 8.40 + fields[argp++] = TypeLong::LONG; // inv 8.41 + fields[argp++] = Type::HALF; 8.42 + fields[argp++] = TypePtr::NOTNULL; // result 8.43 + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 8.44 + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); 8.45 + 8.46 + // result type needed 8.47 + fields = TypeTuple::fields(1); 8.48 + fields[TypeFunc::Parms+0] = TypePtr::NOTNULL; 8.49 + 8.50 + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); 8.51 + return TypeFunc::make(domain, range); 8.52 +} 8.53 8.54 8.55 //------------- Interpreter state access for on stack replacement
9.1 --- a/src/share/vm/opto/runtime.hpp Wed Feb 17 13:40:12 2016 +0300 9.2 +++ b/src/share/vm/opto/runtime.hpp Fri Mar 04 16:15:48 2016 +0300 9.3 @@ -308,6 +308,8 @@ 9.4 static const TypeFunc* squareToLen_Type(); 9.5 9.6 static const TypeFunc* mulAdd_Type(); 9.7 + static const TypeFunc* montgomeryMultiply_Type(); 9.8 + static const TypeFunc* montgomerySquare_Type(); 9.9 9.10 static const TypeFunc* updateBytesCRC32_Type(); 9.11
10.1 --- a/src/share/vm/runtime/sharedRuntime.hpp Wed Feb 17 13:40:12 2016 +0300 10.2 +++ b/src/share/vm/runtime/sharedRuntime.hpp Fri Mar 04 16:15:48 2016 +0300 10.3 @@ -145,6 +145,12 @@ 10.4 static double dsqrt(double f); 10.5 #endif 10.6 10.7 + // Montgomery multiplication 10.8 + static void montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 10.9 + jint len, jlong inv, jint *m_ints); 10.10 + static void montgomery_square(jint *a_ints, jint *n_ints, 10.11 + jint len, jlong inv, jint *m_ints); 10.12 + 10.13 #ifdef __SOFTFP__ 10.14 // C++ compiler generates soft float instructions as well as passing 10.15 // float and double in registers.
11.1 --- a/src/share/vm/runtime/stubRoutines.cpp Wed Feb 17 13:40:12 2016 +0300 11.2 +++ b/src/share/vm/runtime/stubRoutines.cpp Fri Mar 04 16:15:48 2016 +0300 11.3 @@ -138,6 +138,8 @@ 11.4 address StubRoutines::_multiplyToLen = NULL; 11.5 address StubRoutines::_squareToLen = NULL; 11.6 address StubRoutines::_mulAdd = NULL; 11.7 +address StubRoutines::_montgomeryMultiply = NULL; 11.8 +address StubRoutines::_montgomerySquare = NULL; 11.9 11.10 double (* StubRoutines::_intrinsic_log )(double) = NULL; 11.11 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
12.1 --- a/src/share/vm/runtime/stubRoutines.hpp Wed Feb 17 13:40:12 2016 +0300 12.2 +++ b/src/share/vm/runtime/stubRoutines.hpp Fri Mar 04 16:15:48 2016 +0300 12.3 @@ -211,6 +211,8 @@ 12.4 static address _multiplyToLen; 12.5 static address _squareToLen; 12.6 static address _mulAdd; 12.7 + static address _montgomeryMultiply; 12.8 + static address _montgomerySquare; 12.9 12.10 // These are versions of the java.lang.Math methods which perform 12.11 // the same operations as the intrinsic version. They are used for 12.12 @@ -371,6 +373,8 @@ 12.13 static address multiplyToLen() {return _multiplyToLen; } 12.14 static address squareToLen() {return _squareToLen; } 12.15 static address mulAdd() {return _mulAdd; } 12.16 + static address montgomeryMultiply() { return _montgomeryMultiply; } 12.17 + static address montgomerySquare() { return _montgomerySquare; } 12.18 12.19 static address select_fill_function(BasicType t, bool aligned, const char* &name); 12.20
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java Fri Mar 04 16:15:48 2016 +0300 13.3 @@ -0,0 +1,278 @@ 13.4 +// 13.5 +// Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. 13.6 +// Copyright (c) 2015, Red Hat Inc. All rights reserved. 13.7 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 13.8 +// 13.9 +// This code is free software; you can redistribute it and/or modify it 13.10 +// under the terms of the GNU General Public License version 2 only, as 13.11 +// published by the Free Software Foundation. 13.12 +// 13.13 +// This code is distributed in the hope that it will be useful, but WITHOUT 13.14 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13.15 +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13.16 +// version 2 for more details (a copy is included in the LICENSE file that 13.17 +// accompanied this code). 13.18 +// 13.19 +// You should have received a copy of the GNU General Public License version 13.20 +// 2 along with this work; if not, write to the Free Software Foundation, 13.21 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 13.22 +// 13.23 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 13.24 +// or visit www.oracle.com if you need additional information or have any 13.25 +// questions. 13.26 +// 13.27 +// 13.28 + 13.29 +import java.lang.invoke.MethodHandle; 13.30 +import java.lang.invoke.MethodHandles; 13.31 +import java.lang.invoke.MethodType; 13.32 +import java.lang.reflect.Constructor; 13.33 +import java.lang.reflect.Field; 13.34 +import java.lang.reflect.Method; 13.35 +import java.math.BigInteger; 13.36 +import java.util.Arrays; 13.37 +import java.util.Random; 13.38 + 13.39 +/** 13.40 + * @test 13.41 + * @bug 8130150 13.42 + * @library /testlibrary 13.43 + * @requires (os.simpleArch == "x64") & (os.family != "windows") 13.44 + * @summary Verify that the Montgomery multiply intrinsic works and correctly checks its arguments. 13.45 + */ 13.46 + 13.47 +public class MontgomeryMultiplyTest { 13.48 + 13.49 + static final MethodHandles.Lookup lookup = MethodHandles.lookup(); 13.50 + 13.51 + static final MethodHandle montgomeryMultiplyHandle, montgomerySquareHandle; 13.52 + static final MethodHandle bigIntegerConstructorHandle; 13.53 + static final Field bigIntegerMagField; 13.54 + 13.55 + static { 13.56 + // Use reflection to gain access to the methods we want to test. 13.57 + try { 13.58 + Method m = BigInteger.class.getDeclaredMethod("montgomeryMultiply", 13.59 + /*a*/int[].class, /*b*/int[].class, /*n*/int[].class, /*len*/int.class, 13.60 + /*inv*/long.class, /*product*/int[].class); 13.61 + m.setAccessible(true); 13.62 + montgomeryMultiplyHandle = lookup.unreflect(m); 13.63 + 13.64 + m = BigInteger.class.getDeclaredMethod("montgomerySquare", 13.65 + /*a*/int[].class, /*n*/int[].class, /*len*/int.class, 13.66 + /*inv*/long.class, /*product*/int[].class); 13.67 + m.setAccessible(true); 13.68 + montgomerySquareHandle = lookup.unreflect(m); 13.69 + 13.70 + Constructor c 13.71 + = BigInteger.class.getDeclaredConstructor(int.class, int[].class); 13.72 + c.setAccessible(true); 13.73 + bigIntegerConstructorHandle = lookup.unreflectConstructor(c); 13.74 + 13.75 + bigIntegerMagField = BigInteger.class.getDeclaredField("mag"); 13.76 + bigIntegerMagField.setAccessible(true); 13.77 + 13.78 + } catch (Throwable ex) { 13.79 + throw new RuntimeException(ex); 13.80 + } 13.81 + } 13.82 + 13.83 + // Invoke either BigInteger.montgomeryMultiply or BigInteger.montgomerySquare. 13.84 + int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv, 13.85 + int[] product) throws Throwable { 13.86 + int[] result = 13.87 + (a == b) ? (int[]) montgomerySquareHandle.invokeExact(a, n, len, inv, product) 13.88 + : (int[]) montgomeryMultiplyHandle.invokeExact(a, b, n, len, inv, product); 13.89 + return Arrays.copyOf(result, len); 13.90 + } 13.91 + 13.92 + // Invoke the private constructor BigInteger(int[]). 13.93 + BigInteger newBigInteger(int[] val) throws Throwable { 13.94 + return (BigInteger) bigIntegerConstructorHandle.invokeExact(1, val); 13.95 + } 13.96 + 13.97 + // Get the private field BigInteger.mag 13.98 + int[] mag(BigInteger n) { 13.99 + try { 13.100 + return (int[]) bigIntegerMagField.get(n); 13.101 + } catch (Exception ex) { 13.102 + throw new RuntimeException(ex); 13.103 + } 13.104 + } 13.105 + 13.106 + // Montgomery multiplication 13.107 + // Calculate a * b * r^-1 mod n) 13.108 + // 13.109 + // R is a power of the word size 13.110 + // N' = R^-1 mod N 13.111 + // 13.112 + // T := ab 13.113 + // m := (T mod R)N' mod R [so 0 <= m < R] 13.114 + // t := (T + mN)/R 13.115 + // if t >= N then return t - N else return t 13.116 + // 13.117 + BigInteger montgomeryMultiply(BigInteger a, BigInteger b, BigInteger N, 13.118 + int len, BigInteger n_prime) 13.119 + throws Throwable { 13.120 + BigInteger T = a.multiply(b); 13.121 + BigInteger R = BigInteger.ONE.shiftLeft(len*32); 13.122 + BigInteger mask = R.subtract(BigInteger.ONE); 13.123 + BigInteger m = (T.and(mask)).multiply(n_prime); 13.124 + m = m.and(mask); // i.e. m.mod(R) 13.125 + T = T.add(m.multiply(N)); 13.126 + T = T.shiftRight(len*32); // i.e. T.divide(R) 13.127 + if (T.compareTo(N) > 0) { 13.128 + T = T.subtract(N); 13.129 + } 13.130 + return T; 13.131 + } 13.132 + 13.133 + // Call the Montgomery multiply intrinsic. 13.134 + BigInteger montgomeryMultiply(int[] a_words, int[] b_words, int[] n_words, 13.135 + int len, BigInteger inv) 13.136 + throws Throwable { 13.137 + BigInteger t = montgomeryMultiply( 13.138 + newBigInteger(a_words), 13.139 + newBigInteger(b_words), 13.140 + newBigInteger(n_words), 13.141 + len, inv); 13.142 + return t; 13.143 + } 13.144 + 13.145 + // Check that the Montgomery multiply intrinsic returns the same 13.146 + // result as the longhand calculation. 13.147 + void check(int[] a_words, int[] b_words, int[] n_words, int len, BigInteger inv) 13.148 + throws Throwable { 13.149 + BigInteger n = newBigInteger(n_words); 13.150 + BigInteger slow = montgomeryMultiply(a_words, b_words, n_words, len, inv); 13.151 + BigInteger fast 13.152 + = newBigInteger(montgomeryMultiply 13.153 + (a_words, b_words, n_words, len, inv.longValue(), null)); 13.154 + // The intrinsic may not return the same value as the longhand 13.155 + // calculation but they must have the same residue mod N. 13.156 + if (!slow.mod(n).equals(fast.mod(n))) { 13.157 + throw new RuntimeException(); 13.158 + } 13.159 + } 13.160 + 13.161 + Random rnd = new Random(0); 13.162 + 13.163 + // Return a random value of length <= bits in an array of even length 13.164 + int[] random_val(int bits) { 13.165 + int len = (bits+63)/64; // i.e. length in longs 13.166 + int[] val = new int[len*2]; 13.167 + for (int i = 0; i < val.length; i++) 13.168 + val[i] = rnd.nextInt(); 13.169 + int leadingZeros = 64 - (bits & 64); 13.170 + if (leadingZeros >= 32) { 13.171 + val[0] = 0; 13.172 + val[1] &= ~(-1l << (leadingZeros & 31)); 13.173 + } else { 13.174 + val[0] &= ~(-1l << leadingZeros); 13.175 + } 13.176 + return val; 13.177 + } 13.178 + 13.179 + void testOneLength(int lenInBits, int lenInInts) throws Throwable { 13.180 + BigInteger mod = new BigInteger(lenInBits, 2, rnd); 13.181 + BigInteger r = BigInteger.ONE.shiftLeft(lenInInts * 32); 13.182 + BigInteger n_prime = mod.modInverse(r).negate(); 13.183 + 13.184 + // Make n.length even, padding with a zero if necessary 13.185 + int[] n = mag(mod); 13.186 + if (n.length < lenInInts) { 13.187 + int[] x = new int[lenInInts]; 13.188 + System.arraycopy(n, 0, x, lenInInts-n.length, n.length); 13.189 + n = x; 13.190 + } 13.191 + 13.192 + for (int i = 0; i < 10000; i++) { 13.193 + // multiply 13.194 + check(random_val(lenInBits), random_val(lenInBits), n, lenInInts, n_prime); 13.195 + // square 13.196 + int[] tmp = random_val(lenInBits); 13.197 + check(tmp, tmp, n, lenInInts, n_prime); 13.198 + } 13.199 + } 13.200 + 13.201 + // Test the Montgomery multiply intrinsic with a bunch of random 13.202 + // values of varying lengths. Do this for long enough that the 13.203 + // caller of the intrinsic is C2-compiled. 13.204 + void testResultValues() throws Throwable { 13.205 + // Test a couple of interesting edge cases. 13.206 + testOneLength(1024, 32); 13.207 + testOneLength(1025, 34); 13.208 + for (int j = 10; j > 0; j--) { 13.209 + // Construct a random prime whose length in words is even 13.210 + int lenInBits = rnd.nextInt(2048) + 64; 13.211 + int lenInInts = (lenInBits + 63)/64*2; 13.212 + testOneLength(lenInBits, lenInInts); 13.213 + } 13.214 + } 13.215 + 13.216 + // Range checks 13.217 + void testOneMontgomeryMultiplyCheck(int[] a, int[] b, int[] n, int len, long inv, 13.218 + int[] product, Class klass) { 13.219 + try { 13.220 + montgomeryMultiply(a, b, n, len, inv, product); 13.221 + } catch (Throwable ex) { 13.222 + if (klass.isAssignableFrom(ex.getClass())) 13.223 + return; 13.224 + throw new RuntimeException(klass + " expected, " + ex + " was thrown"); 13.225 + } 13.226 + throw new RuntimeException(klass + " expected, was not thrown"); 13.227 + } 13.228 + 13.229 + void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv, 13.230 + Class klass) { 13.231 + testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), null, klass); 13.232 + } 13.233 + 13.234 + void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv, 13.235 + int[] product, Class klass) { 13.236 + testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), product, klass); 13.237 + } 13.238 + 13.239 + void testMontgomeryMultiplyChecks() { 13.240 + int[] blah = random_val(40); 13.241 + int[] small = random_val(39); 13.242 + BigInteger mod = new BigInteger(40*32 , 2, rnd); 13.243 + BigInteger r = BigInteger.ONE.shiftLeft(40*32); 13.244 + BigInteger n_prime = mod.modInverse(r).negate(); 13.245 + 13.246 + // Length out of range: square 13.247 + testOneMontgomeryMultiplyCheck(blah, blah, mod, 41, n_prime, IllegalArgumentException.class); 13.248 + testOneMontgomeryMultiplyCheck(blah, blah, mod, 0, n_prime, IllegalArgumentException.class); 13.249 + testOneMontgomeryMultiplyCheck(blah, blah, mod, -1, n_prime, IllegalArgumentException.class); 13.250 + // As above, but for multiply 13.251 + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 41, n_prime, IllegalArgumentException.class); 13.252 + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class); 13.253 + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class); 13.254 + 13.255 + // Length odd 13.256 + testOneMontgomeryMultiplyCheck(small, small, mod, 39, n_prime, IllegalArgumentException.class); 13.257 + testOneMontgomeryMultiplyCheck(small, small, mod, 0, n_prime, IllegalArgumentException.class); 13.258 + testOneMontgomeryMultiplyCheck(small, small, mod, -1, n_prime, IllegalArgumentException.class); 13.259 + // As above, but for multiply 13.260 + testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 39, n_prime, IllegalArgumentException.class); 13.261 + testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 0, n_prime, IllegalArgumentException.class); 13.262 + testOneMontgomeryMultiplyCheck(small, small.clone(), mod, -1, n_prime, IllegalArgumentException.class); 13.263 + 13.264 + // array too small 13.265 + testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class); 13.266 + testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 40, n_prime, small, IllegalArgumentException.class); 13.267 + testOneMontgomeryMultiplyCheck(small, blah, mod, 40, n_prime, blah, IllegalArgumentException.class); 13.268 + testOneMontgomeryMultiplyCheck(blah, small, mod, 40, n_prime, blah, IllegalArgumentException.class); 13.269 + testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class); 13.270 + testOneMontgomeryMultiplyCheck(small, small, mod, 40, n_prime, blah, IllegalArgumentException.class); 13.271 + } 13.272 + 13.273 + public static void main(String args[]) { 13.274 + try { 13.275 + new MontgomeryMultiplyTest().testMontgomeryMultiplyChecks(); 13.276 + new MontgomeryMultiplyTest().testResultValues(); 13.277 + } catch (Throwable ex) { 13.278 + throw new RuntimeException(ex); 13.279 + } 13.280 + } 13.281 +}