Merge

Sat, 14 Oct 2017 12:05:34 +0000

author
kevinw
date
Sat, 14 Oct 2017 12:05:34 +0000
changeset 8908
d1b47c83a9dd
parent 8907
9f401c01775b
parent 8906
584eac5794ff
child 8909
e0c000e8eb75

Merge

     1.1 --- a/.hgtags	Sat Oct 14 00:42:33 2017 -0700
     1.2 +++ b/.hgtags	Sat Oct 14 12:05:34 2017 +0000
     1.3 @@ -992,3 +992,4 @@
     1.4  0bd600d6d77b5b41780074bcbfa133032dadf657 jdk8u152-b04
     1.5  68758c5ab0c1ef01e89bea8a9b799714831a177f jdk8u152-b05
     1.6  7b96cfeed22242bb68a387d1680e602e37e48050 jdk8u162-b00
     1.7 +92693f9dd704467ddd5fbae5a5908c1713a08ee0 jdk8u162-b01
     2.1 --- a/src/cpu/ppc/vm/assembler_ppc.hpp	Sat Oct 14 00:42:33 2017 -0700
     2.2 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Sat Oct 14 12:05:34 2017 +0000
     2.3 @@ -1180,6 +1180,8 @@
     2.4    inline void mullw_( Register d, Register a, Register b);
     2.5    inline void mulhw(  Register d, Register a, Register b);
     2.6    inline void mulhw_( Register d, Register a, Register b);
     2.7 +  inline void mulhwu( Register d, Register a, Register b);
     2.8 +  inline void mulhwu_(Register d, Register a, Register b);
     2.9    inline void mulhd(  Register d, Register a, Register b);
    2.10    inline void mulhd_( Register d, Register a, Register b);
    2.11    inline void mulhdu( Register d, Register a, Register b);
     3.1 --- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Sat Oct 14 00:42:33 2017 -0700
     3.2 +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Sat Oct 14 12:05:34 2017 +0000
     3.3 @@ -109,6 +109,8 @@
     3.4  inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
     3.5  inline void Assembler::mulhw(  Register d, Register a, Register b) { emit_int32(MULHW_OPCODE  | rt(d) | ra(a) | rb(b) | rc(0)); }
     3.6  inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE  | rt(d) | ra(a) | rb(b) | rc(1)); }
     3.7 +inline void Assembler::mulhwu( Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
     3.8 +inline void Assembler::mulhwu_(Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); }
     3.9  inline void Assembler::mulhd(  Register d, Register a, Register b) { emit_int32(MULHD_OPCODE  | rt(d) | ra(a) | rb(b) | rc(0)); }
    3.10  inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE  | rt(d) | ra(a) | rb(b) | rc(1)); }
    3.11  inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
     4.1 --- a/src/cpu/ppc/vm/c2_init_ppc.cpp	Sat Oct 14 00:42:33 2017 -0700
     4.2 +++ b/src/cpu/ppc/vm/c2_init_ppc.cpp	Sat Oct 14 12:05:34 2017 +0000
     4.3 @@ -45,4 +45,10 @@
     4.4        FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
     4.5      }
     4.6    }
     4.7 +
     4.8 +  if (OptimizeFill) {
     4.9 +    warning("OptimizeFill is not supported on this CPU.");
    4.10 +    FLAG_SET_DEFAULT(OptimizeFill, false);
    4.11 +  }
    4.12 +
    4.13  }
     5.1 --- a/src/cpu/ppc/vm/ppc.ad	Sat Oct 14 00:42:33 2017 -0700
     5.2 +++ b/src/cpu/ppc/vm/ppc.ad	Sat Oct 14 12:05:34 2017 +0000
     5.3 @@ -1,6 +1,6 @@
     5.4  //
     5.5  // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
     5.6 -// Copyright 2012, 2014 SAP AG. All rights reserved.
     5.7 +// Copyright (c) 2012, 2017 SAP SE. All rights reserved.
     5.8  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5.9  //
    5.10  // This code is free software; you can redistribute it and/or modify it
    5.11 @@ -8610,6 +8610,44 @@
    5.12    ins_pipe(pipe_class_default);
    5.13  %}
    5.14  
    5.15 +// Bitfield Extract: URShiftI + AndI
    5.16 +instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{
    5.17 +  match(Set dst (AndI (URShiftI src1 src2) src3));
    5.18 +
    5.19 +  format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %}
    5.20 +  size(4);
    5.21 +  ins_encode %{
    5.22 +    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    5.23 +    int rshift = ($src2$$constant) & 0x1f;
    5.24 +    int length = log2_long(((jlong) $src3$$constant) + 1);
    5.25 +    if (rshift + length > 32) {
    5.26 +      // if necessary, adjust mask to omit rotated bits.
    5.27 +      length = 32 - rshift;
    5.28 +    }
    5.29 +    __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
    5.30 +  %}
    5.31 +  ins_pipe(pipe_class_default);
    5.32 +%}
    5.33 +
    5.34 +// Bitfield Extract: URShiftL + AndL
    5.35 +instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{
    5.36 +  match(Set dst (AndL (URShiftL src1 src2) src3));
    5.37 +
    5.38 +  format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %}
    5.39 +  size(4);
    5.40 +  ins_encode %{
    5.41 +    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    5.42 +    int rshift  = ($src2$$constant) & 0x3f;
    5.43 +    int length = log2_long(((jlong) $src3$$constant) + 1);
    5.44 +    if (rshift + length > 64) {
    5.45 +      // if necessary, adjust mask to omit rotated bits.
    5.46 +      length = 64 - rshift;
    5.47 +    }
    5.48 +    __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
    5.49 +  %}
    5.50 +  ins_pipe(pipe_class_default);
    5.51 +%}
    5.52 +
    5.53  instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
    5.54    match(Set dst (ConvL2I (ConvI2L src)));
    5.55  
    5.56 @@ -8889,6 +8927,19 @@
    5.57    ins_pipe(pipe_class_default);
    5.58  %}
    5.59  
    5.60 +// Left shifted Immediate And
    5.61 +instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
    5.62 +  match(Set dst (AndI src1 src2));
    5.63 +  effect(KILL cr0);
    5.64 +  format %{ "ANDIS   $dst, $src1, $src2.hi" %}
    5.65 +  size(4);
    5.66 +  ins_encode %{
    5.67 +    // TODO: PPC port $archOpcode(ppc64Opcode_andis_);
    5.68 +    __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
    5.69 +  %}
    5.70 +  ins_pipe(pipe_class_default);
    5.71 +%}
    5.72 +
    5.73  // Immediate And
    5.74  instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
    5.75    match(Set dst (AndI src1 src2));
    5.76 @@ -10571,6 +10622,17 @@
    5.77    ins_pipe(pipe_class_compare);
    5.78  %}
    5.79  
    5.80 +instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{
    5.81 +  match(Set crx (CmpP src1 src2));
    5.82 +  format %{ "CMPLDI   $crx, $src1, $src2 \t// ptr" %}
    5.83 +  size(4);
    5.84 +  ins_encode %{
    5.85 +    // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
    5.86 +    __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF)));
    5.87 +  %}
    5.88 +  ins_pipe(pipe_class_compare);
    5.89 +%}
    5.90 +
    5.91  // Used in postalloc expand.
    5.92  instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
    5.93    // This match rule prevents reordering of node before a safepoint.
     6.1 --- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Sat Oct 14 00:42:33 2017 -0700
     6.2 +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Sat Oct 14 12:05:34 2017 +0000
     6.3 @@ -42,6 +42,8 @@
     6.4  #include "opto/runtime.hpp"
     6.5  #endif
     6.6  
     6.7 +#include <alloca.h>
     6.8 +
     6.9  #define __ masm->
    6.10  
    6.11  #ifdef PRODUCT
    6.12 @@ -3268,3 +3270,245 @@
    6.13    return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
    6.14                                         oop_maps, true);
    6.15  }
    6.16 +
    6.17 +
    6.18 +//------------------------------Montgomery multiplication------------------------
    6.19 +//
    6.20 +
    6.21 +// Subtract 0:b from carry:a. Return carry.
    6.22 +static unsigned long
    6.23 +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
    6.24 +  long i = 0;
    6.25 +  unsigned long tmp, tmp2;
    6.26 +  __asm__ __volatile__ (
    6.27 +    "subfc  %[tmp], %[tmp], %[tmp]   \n" // pre-set CA
    6.28 +    "mtctr  %[len]                   \n"
    6.29 +    "0:                              \n"
    6.30 +    "ldx    %[tmp], %[i], %[a]       \n"
    6.31 +    "ldx    %[tmp2], %[i], %[b]      \n"
    6.32 +    "subfe  %[tmp], %[tmp2], %[tmp]  \n" // subtract extended
    6.33 +    "stdx   %[tmp], %[i], %[a]       \n"
    6.34 +    "addi   %[i], %[i], 8            \n"
    6.35 +    "bdnz   0b                       \n"
    6.36 +    "addme  %[tmp], %[carry]         \n" // carry + CA - 1
    6.37 +    : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)
    6.38 +    : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)
    6.39 +    : "ctr", "xer", "memory"
    6.40 +  );
    6.41 +  return tmp;
    6.42 +}
    6.43 +
    6.44 +// Multiply (unsigned) Long A by Long B, accumulating the double-
    6.45 +// length result into the accumulator formed of T0, T1, and T2.
    6.46 +inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
    6.47 +  unsigned long hi, lo;
    6.48 +  __asm__ __volatile__ (
    6.49 +    "mulld  %[lo], %[A], %[B]    \n"
    6.50 +    "mulhdu %[hi], %[A], %[B]    \n"
    6.51 +    "addc   %[T0], %[T0], %[lo]  \n"
    6.52 +    "adde   %[T1], %[T1], %[hi]  \n"
    6.53 +    "addze  %[T2], %[T2]         \n"
    6.54 +    : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
    6.55 +    : [A]"r"(A), [B]"r"(B)
    6.56 +    : "xer"
    6.57 +  );
    6.58 +}
    6.59 +
    6.60 +// As above, but add twice the double-length result into the
    6.61 +// accumulator.
    6.62 +inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
    6.63 +  unsigned long hi, lo;
    6.64 +  __asm__ __volatile__ (
    6.65 +    "mulld  %[lo], %[A], %[B]    \n"
    6.66 +    "mulhdu %[hi], %[A], %[B]    \n"
    6.67 +    "addc   %[T0], %[T0], %[lo]  \n"
    6.68 +    "adde   %[T1], %[T1], %[hi]  \n"
    6.69 +    "addze  %[T2], %[T2]         \n"
    6.70 +    "addc   %[T0], %[T0], %[lo]  \n"
    6.71 +    "adde   %[T1], %[T1], %[hi]  \n"
    6.72 +    "addze  %[T2], %[T2]         \n"
    6.73 +    : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
    6.74 +    : [A]"r"(A), [B]"r"(B)
    6.75 +    : "xer"
    6.76 +  );
    6.77 +}
    6.78 +
    6.79 +// Fast Montgomery multiplication. The derivation of the algorithm is
    6.80 +// in "A Cryptographic Library for the Motorola DSP56000,
    6.81 +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
    6.82 +static void
    6.83 +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
    6.84 +                    unsigned long m[], unsigned long inv, int len) {
    6.85 +  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
    6.86 +  int i;
    6.87 +
    6.88 +  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
    6.89 +
    6.90 +  for (i = 0; i < len; i++) {
    6.91 +    int j;
    6.92 +    for (j = 0; j < i; j++) {
    6.93 +      MACC(a[j], b[i-j], t0, t1, t2);
    6.94 +      MACC(m[j], n[i-j], t0, t1, t2);
    6.95 +    }
    6.96 +    MACC(a[i], b[0], t0, t1, t2);
    6.97 +    m[i] = t0 * inv;
    6.98 +    MACC(m[i], n[0], t0, t1, t2);
    6.99 +
   6.100 +    assert(t0 == 0, "broken Montgomery multiply");
   6.101 +
   6.102 +    t0 = t1; t1 = t2; t2 = 0;
   6.103 +  }
   6.104 +
   6.105 +  for (i = len; i < 2*len; i++) {
   6.106 +    int j;
   6.107 +    for (j = i-len+1; j < len; j++) {
   6.108 +      MACC(a[j], b[i-j], t0, t1, t2);
   6.109 +      MACC(m[j], n[i-j], t0, t1, t2);
   6.110 +    }
   6.111 +    m[i-len] = t0;
   6.112 +    t0 = t1; t1 = t2; t2 = 0;
   6.113 +  }
   6.114 +
   6.115 +  while (t0) {
   6.116 +    t0 = sub(m, n, t0, len);
   6.117 +  }
   6.118 +}
   6.119 +
   6.120 +// Fast Montgomery squaring. This uses asymptotically 25% fewer
   6.121 +// multiplies so it should be up to 25% faster than Montgomery
   6.122 +// multiplication. However, its loop control is more complex and it
   6.123 +// may actually run slower on some machines.
   6.124 +static void
   6.125 +montgomery_square(unsigned long a[], unsigned long n[],
   6.126 +                  unsigned long m[], unsigned long inv, int len) {
   6.127 +  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
   6.128 +  int i;
   6.129 +
   6.130 +  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
   6.131 +
   6.132 +  for (i = 0; i < len; i++) {
   6.133 +    int j;
   6.134 +    int end = (i+1)/2;
   6.135 +    for (j = 0; j < end; j++) {
   6.136 +      MACC2(a[j], a[i-j], t0, t1, t2);
   6.137 +      MACC(m[j], n[i-j], t0, t1, t2);
   6.138 +    }
   6.139 +    if ((i & 1) == 0) {
   6.140 +      MACC(a[j], a[j], t0, t1, t2);
   6.141 +    }
   6.142 +    for (; j < i; j++) {
   6.143 +      MACC(m[j], n[i-j], t0, t1, t2);
   6.144 +    }
   6.145 +    m[i] = t0 * inv;
   6.146 +    MACC(m[i], n[0], t0, t1, t2);
   6.147 +
   6.148 +    assert(t0 == 0, "broken Montgomery square");
   6.149 +
   6.150 +    t0 = t1; t1 = t2; t2 = 0;
   6.151 +  }
   6.152 +
   6.153 +  for (i = len; i < 2*len; i++) {
   6.154 +    int start = i-len+1;
   6.155 +    int end = start + (len - start)/2;
   6.156 +    int j;
   6.157 +    for (j = start; j < end; j++) {
   6.158 +      MACC2(a[j], a[i-j], t0, t1, t2);
   6.159 +      MACC(m[j], n[i-j], t0, t1, t2);
   6.160 +    }
   6.161 +    if ((i & 1) == 0) {
   6.162 +      MACC(a[j], a[j], t0, t1, t2);
   6.163 +    }
   6.164 +    for (; j < len; j++) {
   6.165 +      MACC(m[j], n[i-j], t0, t1, t2);
   6.166 +    }
   6.167 +    m[i-len] = t0;
   6.168 +    t0 = t1; t1 = t2; t2 = 0;
   6.169 +  }
   6.170 +
   6.171 +  while (t0) {
   6.172 +    t0 = sub(m, n, t0, len);
   6.173 +  }
   6.174 +}
   6.175 +
   6.176 +// The threshold at which squaring is advantageous was determined
   6.177 +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
   6.178 +// Doesn't seem to be relevant for Power8 so we use the same value.
   6.179 +#define MONTGOMERY_SQUARING_THRESHOLD 64
   6.180 +
   6.181 +// Copy len longwords from s to d, word-swapping as we go. The
   6.182 +// destination array is reversed.
   6.183 +static void reverse_words(unsigned long *s, unsigned long *d, int len) {
   6.184 +  d += len;
   6.185 +  while(len-- > 0) {
   6.186 +    d--;
   6.187 +    unsigned long s_val = *s;
   6.188 +    // Swap words in a longword on little endian machines.
   6.189 +#ifdef VM_LITTLE_ENDIAN
   6.190 +     s_val = (s_val << 32) | (s_val >> 32);
   6.191 +#endif
   6.192 +    *d = s_val;
   6.193 +    s++;
   6.194 +  }
   6.195 +}
   6.196 +
   6.197 +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
   6.198 +                                        jint len, jlong inv,
   6.199 +                                        jint *m_ints) {
   6.200 +  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
   6.201 +  int longwords = len/2;
   6.202 +  assert(longwords > 0, "unsupported");
   6.203 +
   6.204 +  // Make very sure we don't use so much space that the stack might
   6.205 +  // overflow. 512 jints corresponds to an 16384-bit integer and
   6.206 +  // will use here a total of 8k bytes of stack space.
   6.207 +  int total_allocation = longwords * sizeof (unsigned long) * 4;
   6.208 +  guarantee(total_allocation <= 8192, "must be");
   6.209 +  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
   6.210 +
   6.211 +  // Local scratch arrays
   6.212 +  unsigned long
   6.213 +    *a = scratch + 0 * longwords,
   6.214 +    *b = scratch + 1 * longwords,
   6.215 +    *n = scratch + 2 * longwords,
   6.216 +    *m = scratch + 3 * longwords;
   6.217 +
   6.218 +  reverse_words((unsigned long *)a_ints, a, longwords);
   6.219 +  reverse_words((unsigned long *)b_ints, b, longwords);
   6.220 +  reverse_words((unsigned long *)n_ints, n, longwords);
   6.221 +
   6.222 +  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
   6.223 +
   6.224 +  reverse_words(m, (unsigned long *)m_ints, longwords);
   6.225 +}
   6.226 +
   6.227 +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
   6.228 +                                      jint len, jlong inv,
   6.229 +                                      jint *m_ints) {
   6.230 +  assert(len % 2 == 0, "array length in montgomery_square must be even");
   6.231 +  int longwords = len/2;
   6.232 +  assert(longwords > 0, "unsupported");
   6.233 +
   6.234 +  // Make very sure we don't use so much space that the stack might
   6.235 +  // overflow. 512 jints corresponds to an 16384-bit integer and
   6.236 +  // will use here a total of 6k bytes of stack space.
   6.237 +  int total_allocation = longwords * sizeof (unsigned long) * 3;
   6.238 +  guarantee(total_allocation <= 8192, "must be");
   6.239 +  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
   6.240 +
   6.241 +  // Local scratch arrays
   6.242 +  unsigned long
   6.243 +    *a = scratch + 0 * longwords,
   6.244 +    *n = scratch + 1 * longwords,
   6.245 +    *m = scratch + 2 * longwords;
   6.246 +
   6.247 +  reverse_words((unsigned long *)a_ints, a, longwords);
   6.248 +  reverse_words((unsigned long *)n_ints, n, longwords);
   6.249 +
   6.250 +  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
   6.251 +    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
   6.252 +  } else {
   6.253 +    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
   6.254 +  }
   6.255 +
   6.256 +  reverse_words(m, (unsigned long *)m_ints, longwords);
   6.257 +}
     7.1 --- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Sat Oct 14 00:42:33 2017 -0700
     7.2 +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Sat Oct 14 12:05:34 2017 +0000
     7.3 @@ -2524,6 +2524,14 @@
     7.4        StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
     7.5      }
     7.6  
     7.7 +    if (UseMontgomeryMultiplyIntrinsic) {
     7.8 +      StubRoutines::_montgomeryMultiply
     7.9 +        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
    7.10 +    }
    7.11 +    if (UseMontgomerySquareIntrinsic) {
    7.12 +      StubRoutines::_montgomerySquare
    7.13 +        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
    7.14 +    }
    7.15    }
    7.16  
    7.17   public:
     8.1 --- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Sat Oct 14 00:42:33 2017 -0700
     8.2 +++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Sat Oct 14 12:05:34 2017 +0000
     8.3 @@ -265,7 +265,7 @@
     8.4        __ cmpdi(CCR0, Rmdo, 0);
     8.5        __ beq(CCR0, no_mdo);
     8.6  
     8.7 -      // Increment backedge counter in the MDO.
     8.8 +      // Increment invocation counter in the MDO.
     8.9        const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
    8.10        __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
    8.11        __ addi(Rscratch2, Rscratch2, increment);
    8.12 @@ -277,12 +277,12 @@
    8.13      }
    8.14  
    8.15      // Increment counter in MethodCounters*.
    8.16 -    const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
    8.17 +    const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
    8.18      __ bind(no_mdo);
    8.19      __ get_method_counters(R19_method, R3_counters, done);
    8.20 -    __ lwz(Rscratch2, mo_bc_offs, R3_counters);
    8.21 +    __ lwz(Rscratch2, mo_ic_offs, R3_counters);
    8.22      __ addi(Rscratch2, Rscratch2, increment);
    8.23 -    __ stw(Rscratch2, mo_bc_offs, R3_counters);
    8.24 +    __ stw(Rscratch2, mo_ic_offs, R3_counters);
    8.25      __ load_const_optimized(Rscratch1, mask, R0);
    8.26      __ and_(Rscratch1, Rscratch2, Rscratch1);
    8.27      __ beq(CCR0, *overflow);
     9.1 --- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Sat Oct 14 00:42:33 2017 -0700
     9.2 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Sat Oct 14 12:05:34 2017 +0000
     9.3 @@ -201,6 +201,12 @@
     9.4      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
     9.5    }
     9.6  
     9.7 +  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
     9.8 +    UseMontgomeryMultiplyIntrinsic = true;
     9.9 +  }
    9.10 +  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
    9.11 +    UseMontgomerySquareIntrinsic = true;
    9.12 +  }
    9.13  }
    9.14  
    9.15  void VM_Version::print_features() {
    10.1 --- a/src/os/windows/vm/version.rc	Sat Oct 14 00:42:33 2017 -0700
    10.2 +++ b/src/os/windows/vm/version.rc	Sat Oct 14 12:05:34 2017 +0000
    10.3 @@ -36,7 +36,7 @@
    10.4  //
    10.5  
    10.6  VS_VERSION_INFO VERSIONINFO
    10.7 - FILEVERSION    HS_VER
    10.8 + FILEVERSION    JDK_VER
    10.9   PRODUCTVERSION JDK_VER
   10.10   FILEFLAGSMASK 0x3fL
   10.11  #ifdef _DEBUG
   10.12 @@ -56,7 +56,7 @@
   10.13          BEGIN
   10.14              VALUE "CompanyName",      XSTR(HS_COMPANY)       "\0"
   10.15              VALUE "FileDescription",  XSTR(HS_FILEDESC)      "\0"
   10.16 -            VALUE "FileVersion",      XSTR(HS_DOTVER)        "\0"
   10.17 +            VALUE "FileVersion",      XSTR(JDK_DOTVER)        "\0"
   10.18              VALUE "Full Version",     XSTR(HS_BUILD_ID)      "\0"
   10.19  	    VALUE "InternalName",     XSTR(HS_INTERNAL_NAME) "\0"
   10.20              VALUE "LegalCopyright",   XSTR(HS_COPYRIGHT)     "\0"
    11.1 --- a/src/share/vm/opto/library_call.cpp	Sat Oct 14 00:42:33 2017 -0700
    11.2 +++ b/src/share/vm/opto/library_call.cpp	Sat Oct 14 12:05:34 2017 +0000
    11.3 @@ -6068,11 +6068,21 @@
    11.4      Node* n_start = array_element_address(n, intcon(0), n_elem);
    11.5      Node* m_start = array_element_address(m, intcon(0), m_elem);
    11.6  
    11.7 -    Node* call = make_runtime_call(RC_LEAF,
    11.8 -                                   OptoRuntime::montgomeryMultiply_Type(),
    11.9 -                                   stubAddr, stubName, TypePtr::BOTTOM,
   11.10 -                                   a_start, b_start, n_start, len, inv, top(),
   11.11 -                                   m_start);
   11.12 +    Node* call = NULL;
   11.13 +    if (CCallingConventionRequiresIntsAsLongs) {
   11.14 +      Node* len_I2L = ConvI2L(len);
   11.15 +      call = make_runtime_call(RC_LEAF,
   11.16 +                               OptoRuntime::montgomeryMultiply_Type(),
   11.17 +                               stubAddr, stubName, TypePtr::BOTTOM,
   11.18 +                               a_start, b_start, n_start, len_I2L XTOP, inv,
   11.19 +                               top(), m_start);
   11.20 +    } else {
   11.21 +      call = make_runtime_call(RC_LEAF,
   11.22 +                               OptoRuntime::montgomeryMultiply_Type(),
   11.23 +                               stubAddr, stubName, TypePtr::BOTTOM,
   11.24 +                               a_start, b_start, n_start, len, inv, top(),
   11.25 +                               m_start);
   11.26 +    }
   11.27      set_result(m);
   11.28    }
   11.29  
   11.30 @@ -6122,11 +6132,22 @@
   11.31      Node* n_start = array_element_address(n, intcon(0), n_elem);
   11.32      Node* m_start = array_element_address(m, intcon(0), m_elem);
   11.33  
   11.34 -    Node* call = make_runtime_call(RC_LEAF,
   11.35 -                                   OptoRuntime::montgomerySquare_Type(),
   11.36 -                                   stubAddr, stubName, TypePtr::BOTTOM,
   11.37 -                                   a_start, n_start, len, inv, top(),
   11.38 -                                   m_start);
   11.39 +    Node* call = NULL;
   11.40 +    if (CCallingConventionRequiresIntsAsLongs) {
   11.41 +      Node* len_I2L = ConvI2L(len);
   11.42 +      call = make_runtime_call(RC_LEAF,
   11.43 +                               OptoRuntime::montgomerySquare_Type(),
   11.44 +                               stubAddr, stubName, TypePtr::BOTTOM,
   11.45 +                               a_start, n_start, len_I2L XTOP, inv, top(),
   11.46 +                               m_start);
   11.47 +    } else {
   11.48 +      call = make_runtime_call(RC_LEAF,
   11.49 +                               OptoRuntime::montgomerySquare_Type(),
   11.50 +                               stubAddr, stubName, TypePtr::BOTTOM,
   11.51 +                               a_start, n_start, len, inv, top(),
   11.52 +                               m_start);
   11.53 +    }
   11.54 +
   11.55      set_result(m);
   11.56    }
   11.57  
    12.1 --- a/src/share/vm/opto/runtime.cpp	Sat Oct 14 00:42:33 2017 -0700
    12.2 +++ b/src/share/vm/opto/runtime.cpp	Sat Oct 14 12:05:34 2017 +0000
    12.3 @@ -1003,12 +1003,20 @@
    12.4    // create input type (domain)
    12.5    int num_args      = 7;
    12.6    int argcnt = num_args;
    12.7 +  if (CCallingConventionRequiresIntsAsLongs) {
    12.8 +    argcnt++;                           // additional placeholder
    12.9 +  }
   12.10    const Type** fields = TypeTuple::fields(argcnt);
   12.11    int argp = TypeFunc::Parms;
   12.12    fields[argp++] = TypePtr::NOTNULL;    // a
   12.13    fields[argp++] = TypePtr::NOTNULL;    // b
   12.14    fields[argp++] = TypePtr::NOTNULL;    // n
   12.15 -  fields[argp++] = TypeInt::INT;        // len
   12.16 +  if (CCallingConventionRequiresIntsAsLongs) {
   12.17 +    fields[argp++] = TypeLong::LONG;    // len
   12.18 +    fields[argp++] = TypeLong::HALF;    // placeholder
   12.19 +  } else {
   12.20 +    fields[argp++] = TypeInt::INT;      // len
   12.21 +  }
   12.22    fields[argp++] = TypeLong::LONG;      // inv
   12.23    fields[argp++] = Type::HALF;
   12.24    fields[argp++] = TypePtr::NOTNULL;    // result
   12.25 @@ -1027,11 +1035,19 @@
   12.26    // create input type (domain)
   12.27    int num_args      = 6;
   12.28    int argcnt = num_args;
   12.29 +  if (CCallingConventionRequiresIntsAsLongs) {
   12.30 +    argcnt++;                           // additional placeholder
   12.31 +  }
   12.32    const Type** fields = TypeTuple::fields(argcnt);
   12.33    int argp = TypeFunc::Parms;
   12.34    fields[argp++] = TypePtr::NOTNULL;    // a
   12.35    fields[argp++] = TypePtr::NOTNULL;    // n
   12.36 -  fields[argp++] = TypeInt::INT;        // len
   12.37 +  if (CCallingConventionRequiresIntsAsLongs) {
   12.38 +    fields[argp++] = TypeLong::LONG;    // len
   12.39 +    fields[argp++] = TypeLong::HALF;    // placeholder
   12.40 +  } else {
   12.41 +    fields[argp++] = TypeInt::INT;      // len
   12.42 +  }
   12.43    fields[argp++] = TypeLong::LONG;      // inv
   12.44    fields[argp++] = Type::HALF;
   12.45    fields[argp++] = TypePtr::NOTNULL;    // result

mercurial