Sat, 14 Oct 2017 12:05:34 +0000
Merge
1.1 --- a/.hgtags Sat Oct 14 00:42:33 2017 -0700 1.2 +++ b/.hgtags Sat Oct 14 12:05:34 2017 +0000 1.3 @@ -992,3 +992,4 @@ 1.4 0bd600d6d77b5b41780074bcbfa133032dadf657 jdk8u152-b04 1.5 68758c5ab0c1ef01e89bea8a9b799714831a177f jdk8u152-b05 1.6 7b96cfeed22242bb68a387d1680e602e37e48050 jdk8u162-b00 1.7 +92693f9dd704467ddd5fbae5a5908c1713a08ee0 jdk8u162-b01
2.1 --- a/src/cpu/ppc/vm/assembler_ppc.hpp Sat Oct 14 00:42:33 2017 -0700 2.2 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp Sat Oct 14 12:05:34 2017 +0000 2.3 @@ -1180,6 +1180,8 @@ 2.4 inline void mullw_( Register d, Register a, Register b); 2.5 inline void mulhw( Register d, Register a, Register b); 2.6 inline void mulhw_( Register d, Register a, Register b); 2.7 + inline void mulhwu( Register d, Register a, Register b); 2.8 + inline void mulhwu_(Register d, Register a, Register b); 2.9 inline void mulhd( Register d, Register a, Register b); 2.10 inline void mulhd_( Register d, Register a, Register b); 2.11 inline void mulhdu( Register d, Register a, Register b);
3.1 --- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp Sat Oct 14 00:42:33 2017 -0700 3.2 +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp Sat Oct 14 12:05:34 2017 +0000 3.3 @@ -109,6 +109,8 @@ 3.4 inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } 3.5 inline void Assembler::mulhw( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } 3.6 inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } 3.7 +inline void Assembler::mulhwu( Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } 3.8 +inline void Assembler::mulhwu_(Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } 3.9 inline void Assembler::mulhd( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } 3.10 inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } 3.11 inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
4.1 --- a/src/cpu/ppc/vm/c2_init_ppc.cpp Sat Oct 14 00:42:33 2017 -0700 4.2 +++ b/src/cpu/ppc/vm/c2_init_ppc.cpp Sat Oct 14 12:05:34 2017 +0000 4.3 @@ -45,4 +45,10 @@ 4.4 FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true); 4.5 } 4.6 } 4.7 + 4.8 + if (OptimizeFill) { 4.9 + warning("OptimizeFill is not supported on this CPU."); 4.10 + FLAG_SET_DEFAULT(OptimizeFill, false); 4.11 + } 4.12 + 4.13 }
5.1 --- a/src/cpu/ppc/vm/ppc.ad Sat Oct 14 00:42:33 2017 -0700 5.2 +++ b/src/cpu/ppc/vm/ppc.ad Sat Oct 14 12:05:34 2017 +0000 5.3 @@ -1,6 +1,6 @@ 5.4 // 5.5 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 5.6 -// Copyright 2012, 2014 SAP AG. All rights reserved. 5.7 +// Copyright (c) 2012, 2017 SAP SE. All rights reserved. 5.8 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5.9 // 5.10 // This code is free software; you can redistribute it and/or modify it 5.11 @@ -8610,6 +8610,44 @@ 5.12 ins_pipe(pipe_class_default); 5.13 %} 5.14 5.15 +// Bitfield Extract: URShiftI + AndI 5.16 +instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{ 5.17 + match(Set dst (AndI (URShiftI src1 src2) src3)); 5.18 + 5.19 + format %{ "EXTRDI $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %} 5.20 + size(4); 5.21 + ins_encode %{ 5.22 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 5.23 + int rshift = ($src2$$constant) & 0x1f; 5.24 + int length = log2_long(((jlong) $src3$$constant) + 1); 5.25 + if (rshift + length > 32) { 5.26 + // if necessary, adjust mask to omit rotated bits. 5.27 + length = 32 - rshift; 5.28 + } 5.29 + __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length)); 5.30 + %} 5.31 + ins_pipe(pipe_class_default); 5.32 +%} 5.33 + 5.34 +// Bitfield Extract: URShiftL + AndL 5.35 +instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{ 5.36 + match(Set dst (AndL (URShiftL src1 src2) src3)); 5.37 + 5.38 + format %{ "EXTRDI $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %} 5.39 + size(4); 5.40 + ins_encode %{ 5.41 + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); 5.42 + int rshift = ($src2$$constant) & 0x3f; 5.43 + int length = log2_long(((jlong) $src3$$constant) + 1); 5.44 + if (rshift + length > 64) { 5.45 + // if necessary, adjust mask to omit rotated bits. 5.46 + length = 64 - rshift; 5.47 + } 5.48 + __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length)); 5.49 + %} 5.50 + ins_pipe(pipe_class_default); 5.51 +%} 5.52 + 5.53 instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{ 5.54 match(Set dst (ConvL2I (ConvI2L src))); 5.55 5.56 @@ -8889,6 +8927,19 @@ 5.57 ins_pipe(pipe_class_default); 5.58 %} 5.59 5.60 +// Left shifted Immediate And 5.61 +instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2, flagsRegCR0 cr0) %{ 5.62 + match(Set dst (AndI src1 src2)); 5.63 + effect(KILL cr0); 5.64 + format %{ "ANDIS $dst, $src1, $src2.hi" %} 5.65 + size(4); 5.66 + ins_encode %{ 5.67 + // TODO: PPC port $archOpcode(ppc64Opcode_andis_); 5.68 + __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16))); 5.69 + %} 5.70 + ins_pipe(pipe_class_default); 5.71 +%} 5.72 + 5.73 // Immediate And 5.74 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{ 5.75 match(Set dst (AndI src1 src2)); 5.76 @@ -10571,6 +10622,17 @@ 5.77 ins_pipe(pipe_class_compare); 5.78 %} 5.79 5.80 +instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{ 5.81 + match(Set crx (CmpP src1 src2)); 5.82 + format %{ "CMPLDI $crx, $src1, $src2 \t// ptr" %} 5.83 + size(4); 5.84 + ins_encode %{ 5.85 + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); 5.86 + __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF))); 5.87 + %} 5.88 + ins_pipe(pipe_class_compare); 5.89 +%} 5.90 + 5.91 // Used in postalloc expand. 5.92 instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{ 5.93 // This match rule prevents reordering of node before a safepoint.
6.1 --- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Sat Oct 14 00:42:33 2017 -0700 6.2 +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Sat Oct 14 12:05:34 2017 +0000 6.3 @@ -42,6 +42,8 @@ 6.4 #include "opto/runtime.hpp" 6.5 #endif 6.6 6.7 +#include <alloca.h> 6.8 + 6.9 #define __ masm-> 6.10 6.11 #ifdef PRODUCT 6.12 @@ -3268,3 +3270,245 @@ 6.13 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize, 6.14 oop_maps, true); 6.15 } 6.16 + 6.17 + 6.18 +//------------------------------Montgomery multiplication------------------------ 6.19 +// 6.20 + 6.21 +// Subtract 0:b from carry:a. Return carry. 6.22 +static unsigned long 6.23 +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { 6.24 + long i = 0; 6.25 + unsigned long tmp, tmp2; 6.26 + __asm__ __volatile__ ( 6.27 + "subfc %[tmp], %[tmp], %[tmp] \n" // pre-set CA 6.28 + "mtctr %[len] \n" 6.29 + "0: \n" 6.30 + "ldx %[tmp], %[i], %[a] \n" 6.31 + "ldx %[tmp2], %[i], %[b] \n" 6.32 + "subfe %[tmp], %[tmp2], %[tmp] \n" // subtract extended 6.33 + "stdx %[tmp], %[i], %[a] \n" 6.34 + "addi %[i], %[i], 8 \n" 6.35 + "bdnz 0b \n" 6.36 + "addme %[tmp], %[carry] \n" // carry + CA - 1 6.37 + : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2) 6.38 + : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len) 6.39 + : "ctr", "xer", "memory" 6.40 + ); 6.41 + return tmp; 6.42 +} 6.43 + 6.44 +// Multiply (unsigned) Long A by Long B, accumulating the double- 6.45 +// length result into the accumulator formed of T0, T1, and T2. 6.46 +inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) { 6.47 + unsigned long hi, lo; 6.48 + __asm__ __volatile__ ( 6.49 + "mulld %[lo], %[A], %[B] \n" 6.50 + "mulhdu %[hi], %[A], %[B] \n" 6.51 + "addc %[T0], %[T0], %[lo] \n" 6.52 + "adde %[T1], %[T1], %[hi] \n" 6.53 + "addze %[T2], %[T2] \n" 6.54 + : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2) 6.55 + : [A]"r"(A), [B]"r"(B) 6.56 + : "xer" 6.57 + ); 6.58 +} 6.59 + 6.60 +// As above, but add twice the double-length result into the 6.61 +// accumulator. 6.62 +inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) { 6.63 + unsigned long hi, lo; 6.64 + __asm__ __volatile__ ( 6.65 + "mulld %[lo], %[A], %[B] \n" 6.66 + "mulhdu %[hi], %[A], %[B] \n" 6.67 + "addc %[T0], %[T0], %[lo] \n" 6.68 + "adde %[T1], %[T1], %[hi] \n" 6.69 + "addze %[T2], %[T2] \n" 6.70 + "addc %[T0], %[T0], %[lo] \n" 6.71 + "adde %[T1], %[T1], %[hi] \n" 6.72 + "addze %[T2], %[T2] \n" 6.73 + : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2) 6.74 + : [A]"r"(A), [B]"r"(B) 6.75 + : "xer" 6.76 + ); 6.77 +} 6.78 + 6.79 +// Fast Montgomery multiplication. The derivation of the algorithm is 6.80 +// in "A Cryptographic Library for the Motorola DSP56000, 6.81 +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237". 6.82 +static void 6.83 +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], 6.84 + unsigned long m[], unsigned long inv, int len) { 6.85 + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 6.86 + int i; 6.87 + 6.88 + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 6.89 + 6.90 + for (i = 0; i < len; i++) { 6.91 + int j; 6.92 + for (j = 0; j < i; j++) { 6.93 + MACC(a[j], b[i-j], t0, t1, t2); 6.94 + MACC(m[j], n[i-j], t0, t1, t2); 6.95 + } 6.96 + MACC(a[i], b[0], t0, t1, t2); 6.97 + m[i] = t0 * inv; 6.98 + MACC(m[i], n[0], t0, t1, t2); 6.99 + 6.100 + assert(t0 == 0, "broken Montgomery multiply"); 6.101 + 6.102 + t0 = t1; t1 = t2; t2 = 0; 6.103 + } 6.104 + 6.105 + for (i = len; i < 2*len; i++) { 6.106 + int j; 6.107 + for (j = i-len+1; j < len; j++) { 6.108 + MACC(a[j], b[i-j], t0, t1, t2); 6.109 + MACC(m[j], n[i-j], t0, t1, t2); 6.110 + } 6.111 + m[i-len] = t0; 6.112 + t0 = t1; t1 = t2; t2 = 0; 6.113 + } 6.114 + 6.115 + while (t0) { 6.116 + t0 = sub(m, n, t0, len); 6.117 + } 6.118 +} 6.119 + 6.120 +// Fast Montgomery squaring. This uses asymptotically 25% fewer 6.121 +// multiplies so it should be up to 25% faster than Montgomery 6.122 +// multiplication. However, its loop control is more complex and it 6.123 +// may actually run slower on some machines. 6.124 +static void 6.125 +montgomery_square(unsigned long a[], unsigned long n[], 6.126 + unsigned long m[], unsigned long inv, int len) { 6.127 + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 6.128 + int i; 6.129 + 6.130 + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 6.131 + 6.132 + for (i = 0; i < len; i++) { 6.133 + int j; 6.134 + int end = (i+1)/2; 6.135 + for (j = 0; j < end; j++) { 6.136 + MACC2(a[j], a[i-j], t0, t1, t2); 6.137 + MACC(m[j], n[i-j], t0, t1, t2); 6.138 + } 6.139 + if ((i & 1) == 0) { 6.140 + MACC(a[j], a[j], t0, t1, t2); 6.141 + } 6.142 + for (; j < i; j++) { 6.143 + MACC(m[j], n[i-j], t0, t1, t2); 6.144 + } 6.145 + m[i] = t0 * inv; 6.146 + MACC(m[i], n[0], t0, t1, t2); 6.147 + 6.148 + assert(t0 == 0, "broken Montgomery square"); 6.149 + 6.150 + t0 = t1; t1 = t2; t2 = 0; 6.151 + } 6.152 + 6.153 + for (i = len; i < 2*len; i++) { 6.154 + int start = i-len+1; 6.155 + int end = start + (len - start)/2; 6.156 + int j; 6.157 + for (j = start; j < end; j++) { 6.158 + MACC2(a[j], a[i-j], t0, t1, t2); 6.159 + MACC(m[j], n[i-j], t0, t1, t2); 6.160 + } 6.161 + if ((i & 1) == 0) { 6.162 + MACC(a[j], a[j], t0, t1, t2); 6.163 + } 6.164 + for (; j < len; j++) { 6.165 + MACC(m[j], n[i-j], t0, t1, t2); 6.166 + } 6.167 + m[i-len] = t0; 6.168 + t0 = t1; t1 = t2; t2 = 0; 6.169 + } 6.170 + 6.171 + while (t0) { 6.172 + t0 = sub(m, n, t0, len); 6.173 + } 6.174 +} 6.175 + 6.176 +// The threshold at which squaring is advantageous was determined 6.177 +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 6.178 +// Doesn't seem to be relevant for Power8 so we use the same value. 6.179 +#define MONTGOMERY_SQUARING_THRESHOLD 64 6.180 + 6.181 +// Copy len longwords from s to d, word-swapping as we go. The 6.182 +// destination array is reversed. 6.183 +static void reverse_words(unsigned long *s, unsigned long *d, int len) { 6.184 + d += len; 6.185 + while(len-- > 0) { 6.186 + d--; 6.187 + unsigned long s_val = *s; 6.188 + // Swap words in a longword on little endian machines. 6.189 +#ifdef VM_LITTLE_ENDIAN 6.190 + s_val = (s_val << 32) | (s_val >> 32); 6.191 +#endif 6.192 + *d = s_val; 6.193 + s++; 6.194 + } 6.195 +} 6.196 + 6.197 +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 6.198 + jint len, jlong inv, 6.199 + jint *m_ints) { 6.200 + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 6.201 + int longwords = len/2; 6.202 + assert(longwords > 0, "unsupported"); 6.203 + 6.204 + // Make very sure we don't use so much space that the stack might 6.205 + // overflow. 512 jints corresponds to an 16384-bit integer and 6.206 + // will use here a total of 8k bytes of stack space. 6.207 + int total_allocation = longwords * sizeof (unsigned long) * 4; 6.208 + guarantee(total_allocation <= 8192, "must be"); 6.209 + unsigned long *scratch = (unsigned long *)alloca(total_allocation); 6.210 + 6.211 + // Local scratch arrays 6.212 + unsigned long 6.213 + *a = scratch + 0 * longwords, 6.214 + *b = scratch + 1 * longwords, 6.215 + *n = scratch + 2 * longwords, 6.216 + *m = scratch + 3 * longwords; 6.217 + 6.218 + reverse_words((unsigned long *)a_ints, a, longwords); 6.219 + reverse_words((unsigned long *)b_ints, b, longwords); 6.220 + reverse_words((unsigned long *)n_ints, n, longwords); 6.221 + 6.222 + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); 6.223 + 6.224 + reverse_words(m, (unsigned long *)m_ints, longwords); 6.225 +} 6.226 + 6.227 +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 6.228 + jint len, jlong inv, 6.229 + jint *m_ints) { 6.230 + assert(len % 2 == 0, "array length in montgomery_square must be even"); 6.231 + int longwords = len/2; 6.232 + assert(longwords > 0, "unsupported"); 6.233 + 6.234 + // Make very sure we don't use so much space that the stack might 6.235 + // overflow. 512 jints corresponds to an 16384-bit integer and 6.236 + // will use here a total of 6k bytes of stack space. 6.237 + int total_allocation = longwords * sizeof (unsigned long) * 3; 6.238 + guarantee(total_allocation <= 8192, "must be"); 6.239 + unsigned long *scratch = (unsigned long *)alloca(total_allocation); 6.240 + 6.241 + // Local scratch arrays 6.242 + unsigned long 6.243 + *a = scratch + 0 * longwords, 6.244 + *n = scratch + 1 * longwords, 6.245 + *m = scratch + 2 * longwords; 6.246 + 6.247 + reverse_words((unsigned long *)a_ints, a, longwords); 6.248 + reverse_words((unsigned long *)n_ints, n, longwords); 6.249 + 6.250 + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 6.251 + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); 6.252 + } else { 6.253 + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); 6.254 + } 6.255 + 6.256 + reverse_words(m, (unsigned long *)m_ints, longwords); 6.257 +}
7.1 --- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp Sat Oct 14 00:42:33 2017 -0700 7.2 +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp Sat Oct 14 12:05:34 2017 +0000 7.3 @@ -2524,6 +2524,14 @@ 7.4 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 7.5 } 7.6 7.7 + if (UseMontgomeryMultiplyIntrinsic) { 7.8 + StubRoutines::_montgomeryMultiply 7.9 + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 7.10 + } 7.11 + if (UseMontgomerySquareIntrinsic) { 7.12 + StubRoutines::_montgomerySquare 7.13 + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 7.14 + } 7.15 } 7.16 7.17 public:
8.1 --- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Sat Oct 14 00:42:33 2017 -0700 8.2 +++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Sat Oct 14 12:05:34 2017 +0000 8.3 @@ -265,7 +265,7 @@ 8.4 __ cmpdi(CCR0, Rmdo, 0); 8.5 __ beq(CCR0, no_mdo); 8.6 8.7 - // Increment backedge counter in the MDO. 8.8 + // Increment invocation counter in the MDO. 8.9 const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); 8.10 __ lwz(Rscratch2, mdo_bc_offs, Rmdo); 8.11 __ addi(Rscratch2, Rscratch2, increment); 8.12 @@ -277,12 +277,12 @@ 8.13 } 8.14 8.15 // Increment counter in MethodCounters*. 8.16 - const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); 8.17 + const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); 8.18 __ bind(no_mdo); 8.19 __ get_method_counters(R19_method, R3_counters, done); 8.20 - __ lwz(Rscratch2, mo_bc_offs, R3_counters); 8.21 + __ lwz(Rscratch2, mo_ic_offs, R3_counters); 8.22 __ addi(Rscratch2, Rscratch2, increment); 8.23 - __ stw(Rscratch2, mo_bc_offs, R3_counters); 8.24 + __ stw(Rscratch2, mo_ic_offs, R3_counters); 8.25 __ load_const_optimized(Rscratch1, mask, R0); 8.26 __ and_(Rscratch1, Rscratch2, Rscratch1); 8.27 __ beq(CCR0, *overflow);
9.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp Sat Oct 14 00:42:33 2017 -0700 9.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Sat Oct 14 12:05:34 2017 +0000 9.3 @@ -201,6 +201,12 @@ 9.4 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 9.5 } 9.6 9.7 + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 9.8 + UseMontgomeryMultiplyIntrinsic = true; 9.9 + } 9.10 + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 9.11 + UseMontgomerySquareIntrinsic = true; 9.12 + } 9.13 } 9.14 9.15 void VM_Version::print_features() {
10.1 --- a/src/os/windows/vm/version.rc Sat Oct 14 00:42:33 2017 -0700 10.2 +++ b/src/os/windows/vm/version.rc Sat Oct 14 12:05:34 2017 +0000 10.3 @@ -36,7 +36,7 @@ 10.4 // 10.5 10.6 VS_VERSION_INFO VERSIONINFO 10.7 - FILEVERSION HS_VER 10.8 + FILEVERSION JDK_VER 10.9 PRODUCTVERSION JDK_VER 10.10 FILEFLAGSMASK 0x3fL 10.11 #ifdef _DEBUG 10.12 @@ -56,7 +56,7 @@ 10.13 BEGIN 10.14 VALUE "CompanyName", XSTR(HS_COMPANY) "\0" 10.15 VALUE "FileDescription", XSTR(HS_FILEDESC) "\0" 10.16 - VALUE "FileVersion", XSTR(HS_DOTVER) "\0" 10.17 + VALUE "FileVersion", XSTR(JDK_DOTVER) "\0" 10.18 VALUE "Full Version", XSTR(HS_BUILD_ID) "\0" 10.19 VALUE "InternalName", XSTR(HS_INTERNAL_NAME) "\0" 10.20 VALUE "LegalCopyright", XSTR(HS_COPYRIGHT) "\0"
11.1 --- a/src/share/vm/opto/library_call.cpp Sat Oct 14 00:42:33 2017 -0700 11.2 +++ b/src/share/vm/opto/library_call.cpp Sat Oct 14 12:05:34 2017 +0000 11.3 @@ -6068,11 +6068,21 @@ 11.4 Node* n_start = array_element_address(n, intcon(0), n_elem); 11.5 Node* m_start = array_element_address(m, intcon(0), m_elem); 11.6 11.7 - Node* call = make_runtime_call(RC_LEAF, 11.8 - OptoRuntime::montgomeryMultiply_Type(), 11.9 - stubAddr, stubName, TypePtr::BOTTOM, 11.10 - a_start, b_start, n_start, len, inv, top(), 11.11 - m_start); 11.12 + Node* call = NULL; 11.13 + if (CCallingConventionRequiresIntsAsLongs) { 11.14 + Node* len_I2L = ConvI2L(len); 11.15 + call = make_runtime_call(RC_LEAF, 11.16 + OptoRuntime::montgomeryMultiply_Type(), 11.17 + stubAddr, stubName, TypePtr::BOTTOM, 11.18 + a_start, b_start, n_start, len_I2L XTOP, inv, 11.19 + top(), m_start); 11.20 + } else { 11.21 + call = make_runtime_call(RC_LEAF, 11.22 + OptoRuntime::montgomeryMultiply_Type(), 11.23 + stubAddr, stubName, TypePtr::BOTTOM, 11.24 + a_start, b_start, n_start, len, inv, top(), 11.25 + m_start); 11.26 + } 11.27 set_result(m); 11.28 } 11.29 11.30 @@ -6122,11 +6132,22 @@ 11.31 Node* n_start = array_element_address(n, intcon(0), n_elem); 11.32 Node* m_start = array_element_address(m, intcon(0), m_elem); 11.33 11.34 - Node* call = make_runtime_call(RC_LEAF, 11.35 - OptoRuntime::montgomerySquare_Type(), 11.36 - stubAddr, stubName, TypePtr::BOTTOM, 11.37 - a_start, n_start, len, inv, top(), 11.38 - m_start); 11.39 + Node* call = NULL; 11.40 + if (CCallingConventionRequiresIntsAsLongs) { 11.41 + Node* len_I2L = ConvI2L(len); 11.42 + call = make_runtime_call(RC_LEAF, 11.43 + OptoRuntime::montgomerySquare_Type(), 11.44 + stubAddr, stubName, TypePtr::BOTTOM, 11.45 + a_start, n_start, len_I2L XTOP, inv, top(), 11.46 + m_start); 11.47 + } else { 11.48 + call = make_runtime_call(RC_LEAF, 11.49 + OptoRuntime::montgomerySquare_Type(), 11.50 + stubAddr, stubName, TypePtr::BOTTOM, 11.51 + a_start, n_start, len, inv, top(), 11.52 + m_start); 11.53 + } 11.54 + 11.55 set_result(m); 11.56 } 11.57
12.1 --- a/src/share/vm/opto/runtime.cpp Sat Oct 14 00:42:33 2017 -0700 12.2 +++ b/src/share/vm/opto/runtime.cpp Sat Oct 14 12:05:34 2017 +0000 12.3 @@ -1003,12 +1003,20 @@ 12.4 // create input type (domain) 12.5 int num_args = 7; 12.6 int argcnt = num_args; 12.7 + if (CCallingConventionRequiresIntsAsLongs) { 12.8 + argcnt++; // additional placeholder 12.9 + } 12.10 const Type** fields = TypeTuple::fields(argcnt); 12.11 int argp = TypeFunc::Parms; 12.12 fields[argp++] = TypePtr::NOTNULL; // a 12.13 fields[argp++] = TypePtr::NOTNULL; // b 12.14 fields[argp++] = TypePtr::NOTNULL; // n 12.15 - fields[argp++] = TypeInt::INT; // len 12.16 + if (CCallingConventionRequiresIntsAsLongs) { 12.17 + fields[argp++] = TypeLong::LONG; // len 12.18 + fields[argp++] = TypeLong::HALF; // placeholder 12.19 + } else { 12.20 + fields[argp++] = TypeInt::INT; // len 12.21 + } 12.22 fields[argp++] = TypeLong::LONG; // inv 12.23 fields[argp++] = Type::HALF; 12.24 fields[argp++] = TypePtr::NOTNULL; // result 12.25 @@ -1027,11 +1035,19 @@ 12.26 // create input type (domain) 12.27 int num_args = 6; 12.28 int argcnt = num_args; 12.29 + if (CCallingConventionRequiresIntsAsLongs) { 12.30 + argcnt++; // additional placeholder 12.31 + } 12.32 const Type** fields = TypeTuple::fields(argcnt); 12.33 int argp = TypeFunc::Parms; 12.34 fields[argp++] = TypePtr::NOTNULL; // a 12.35 fields[argp++] = TypePtr::NOTNULL; // n 12.36 - fields[argp++] = TypeInt::INT; // len 12.37 + if (CCallingConventionRequiresIntsAsLongs) { 12.38 + fields[argp++] = TypeLong::LONG; // len 12.39 + fields[argp++] = TypeLong::HALF; // placeholder 12.40 + } else { 12.41 + fields[argp++] = TypeInt::INT; // len 12.42 + } 12.43 fields[argp++] = TypeLong::LONG; // inv 12.44 fields[argp++] = Type::HALF; 12.45 fields[argp++] = TypePtr::NOTNULL; // result