jdk8-mips64-public/hotspot: changeset 9681:d690709cc339

     1.1 --- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Fri May 17 18:53:31 2019 +0100
     1.2 +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu May 23 03:59:28 2019 +0100
     1.3 @@ -1131,8 +1131,11 @@
     1.4      Register tmp3 = R8_ARG6;
     1.5      Register tmp4 = R9_ARG7;
     1.6  
     1.7 -
     1.8 -    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;
     1.9 +    VectorSRegister tmp_vsr1  = VSR1;
    1.10 +    VectorSRegister tmp_vsr2  = VSR2;
    1.11 +
    1.12 +    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10;
    1.13 +
    1.14      // Don't try anything fancy if arrays don't have many elements.
    1.15      __ li(tmp3, 0);
    1.16      __ cmpwi(CCR0, R5_ARG3, 17);
    1.17 @@ -1186,6 +1189,8 @@
    1.18        __ andi_(R5_ARG3, R5_ARG3, 31);
    1.19        __ mtctr(tmp1);
    1.20  
    1.21 +     if (!VM_Version::has_vsx()) {
    1.22 +
    1.23        __ bind(l_8);
    1.24        // Use unrolled version for mass copying (copy 32 elements a time)
    1.25        // Load feeding store gets zero latency on Power6, however not on Power5.
    1.26 @@ -1201,7 +1206,44 @@
    1.27        __ addi(R3_ARG1, R3_ARG1, 32);
    1.28        __ addi(R4_ARG2, R4_ARG2, 32);
    1.29        __ bdnz(l_8);
    1.30 -    }
    1.31 +
    1.32 +    } else { // Processor supports VSX, so use it to mass copy.
    1.33 +
    1.34 +      // Prefetch the data into the L2 cache.
    1.35 +      __ dcbt(R3_ARG1, 0);
    1.36 +
    1.37 +      // If supported set DSCR pre-fetch to deepest.
    1.38 +      if (VM_Version::has_mfdscr()) {
    1.39 +        __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
    1.40 +        __ mtdscr(tmp2);
    1.41 +      }
    1.42 +
    1.43 +      __ li(tmp1, 16);
    1.44 +
    1.45 +      // Backbranch target aligned to 32-byte. Not 16-byte align as
    1.46 +      // loop contains < 8 instructions that fit inside a single
    1.47 +      // i-cache sector.
    1.48 +      __ align(32);
    1.49 +
    1.50 +      __ bind(l_10);
    1.51 +      // Use loop with VSX load/store instructions to
    1.52 +      // copy 32 elements a time.
    1.53 +      __ lxvd2x(tmp_vsr1, 0, R3_ARG1);     // Load src
    1.54 +      __ stxvd2x(tmp_vsr1, 0, R4_ARG2);    // Store to dst
    1.55 +      __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
    1.56 +      __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
    1.57 +      __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
    1.58 +      __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
    1.59 +      __ bdnz(l_10);                       // Dec CTR and loop if not zero.
    1.60 +
    1.61 +      // Restore DSCR pre-fetch value.
    1.62 +      if (VM_Version::has_mfdscr()) {
    1.63 +        __ load_const_optimized(tmp2, VM_Version::_dscr_val);
    1.64 +        __ mtdscr(tmp2);
    1.65 +      }
    1.66 +
    1.67 +    } // VSX
    1.68 +   } // FasterArrayCopy
    1.69  
    1.70      __ bind(l_6);
    1.71  
    1.72 @@ -1570,7 +1612,11 @@
    1.73      Register tmp3 = R8_ARG6;
    1.74      Register tmp4 = R0;
    1.75  
    1.76 -    Label l_1, l_2, l_3, l_4, l_5, l_6;
    1.77 +    VectorSRegister tmp_vsr1  = VSR1;
    1.78 +    VectorSRegister tmp_vsr2  = VSR2;
    1.79 +
    1.80 +    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7;
    1.81 +
    1.82      // for short arrays, just do single element copy
    1.83      __ li(tmp3, 0);
    1.84      __ cmpwi(CCR0, R5_ARG3, 5);
    1.85 @@ -1605,6 +1651,8 @@
    1.86        __ andi_(R5_ARG3, R5_ARG3, 7);
    1.87        __ mtctr(tmp1);
    1.88  
    1.89 +     if (!VM_Version::has_vsx()) {
    1.90 +
    1.91        __ bind(l_6);
    1.92        // Use unrolled version for mass copying (copy 8 elements a time).
    1.93        // Load feeding store gets zero latency on power6, however not on power 5.
    1.94 @@ -1620,7 +1668,44 @@
    1.95        __ addi(R3_ARG1, R3_ARG1, 32);
    1.96        __ addi(R4_ARG2, R4_ARG2, 32);
    1.97        __ bdnz(l_6);
    1.98 -    }
    1.99 +
   1.100 +    } else { // Processor supports VSX, so use it to mass copy.
   1.101 +
   1.102 +      // Prefetch the data into the L2 cache.
   1.103 +      __ dcbt(R3_ARG1, 0);
   1.104 +
   1.105 +      // If supported set DSCR pre-fetch to deepest.
   1.106 +      if (VM_Version::has_mfdscr()) {
   1.107 +        __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
   1.108 +        __ mtdscr(tmp2);
   1.109 +      }
   1.110 +
   1.111 +      __ li(tmp1, 16);
   1.112 +
   1.113 +      // Backbranch target aligned to 32-byte. Not 16-byte align as
   1.114 +      // loop contains < 8 instructions that fit inside a single
   1.115 +      // i-cache sector.
   1.116 +      __ align(32);
   1.117 +
   1.118 +      __ bind(l_7);
   1.119 +      // Use loop with VSX load/store instructions to
   1.120 +      // copy 8 elements a time.
   1.121 +      __ lxvd2x(tmp_vsr1, 0, R3_ARG1);     // Load src
   1.122 +      __ stxvd2x(tmp_vsr1, 0, R4_ARG2);    // Store to dst
   1.123 +      __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
   1.124 +      __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
   1.125 +      __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
   1.126 +      __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
   1.127 +      __ bdnz(l_7);                        // Dec CTR and loop if not zero.
   1.128 +
   1.129 +      // Restore DSCR pre-fetch value.
   1.130 +      if (VM_Version::has_mfdscr()) {
   1.131 +        __ load_const_optimized(tmp2, VM_Version::_dscr_val);
   1.132 +        __ mtdscr(tmp2);
   1.133 +      }
   1.134 +
   1.135 +    } // VSX
   1.136 +   } // FasterArrayCopy
   1.137  
   1.138      // copy 1 element at a time
   1.139      __ bind(l_2);
   1.140 @@ -1772,7 +1857,10 @@
   1.141      Register tmp3 = R8_ARG6;
   1.142      Register tmp4 = R0;
   1.143  
   1.144 -    Label l_1, l_2, l_3, l_4;
   1.145 +    Label l_1, l_2, l_3, l_4, l_5;
   1.146 +
   1.147 +    VectorSRegister tmp_vsr1  = VSR1;
   1.148 +    VectorSRegister tmp_vsr2  = VSR2;
   1.149  
   1.150      { // FasterArrayCopy
   1.151        __ cmpwi(CCR0, R5_ARG3, 3);
   1.152 @@ -1782,6 +1870,7 @@
   1.153        __ andi_(R5_ARG3, R5_ARG3, 3);
   1.154        __ mtctr(tmp1);
   1.155  
   1.156 +    if (!VM_Version::has_vsx()) {
   1.157        __ bind(l_4);
   1.158        // Use unrolled version for mass copying (copy 4 elements a time).
   1.159        // Load feeding store gets zero latency on Power6, however not on Power5.
   1.160 @@ -1797,7 +1886,44 @@
   1.161        __ addi(R3_ARG1, R3_ARG1, 32);
   1.162        __ addi(R4_ARG2, R4_ARG2, 32);
   1.163        __ bdnz(l_4);
   1.164 -    }
   1.165 +
   1.166 +    } else { // Processor supports VSX, so use it to mass copy.
   1.167 +
   1.168 +      // Prefetch the data into the L2 cache.
   1.169 +      __ dcbt(R3_ARG1, 0);
   1.170 +
   1.171 +      // If supported set DSCR pre-fetch to deepest.
   1.172 +      if (VM_Version::has_mfdscr()) {
   1.173 +        __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
   1.174 +        __ mtdscr(tmp2);
   1.175 +      }
   1.176 +
   1.177 +      __ li(tmp1, 16);
   1.178 +
   1.179 +      // Backbranch target aligned to 32-byte. Not 16-byte align as
   1.180 +      // loop contains < 8 instructions that fit inside a single
   1.181 +      // i-cache sector.
   1.182 +      __ align(32);
   1.183 +
   1.184 +      __ bind(l_5);
   1.185 +      // Use loop with VSX load/store instructions to
   1.186 +      // copy 4 elements a time.
   1.187 +      __ lxvd2x(tmp_vsr1, 0, R3_ARG1);     // Load src
   1.188 +      __ stxvd2x(tmp_vsr1, 0, R4_ARG2);    // Store to dst
   1.189 +      __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
   1.190 +      __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
   1.191 +      __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
   1.192 +      __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
   1.193 +      __ bdnz(l_5);                        // Dec CTR and loop if not zero.
   1.194 +
   1.195 +      // Restore DSCR pre-fetch value.
   1.196 +      if (VM_Version::has_mfdscr()) {
   1.197 +        __ load_const_optimized(tmp2, VM_Version::_dscr_val);
   1.198 +        __ mtdscr(tmp2);
   1.199 +      }
   1.200 +
   1.201 +    } // VSX
   1.202 +   } // FasterArrayCopy
   1.203  
   1.204      // copy 1 element at a time
   1.205      __ bind(l_3);

     2.1 --- a/src/share/vm/prims/jni.cpp	Fri May 17 18:53:31 2019 +0100
     2.2 +++ b/src/share/vm/prims/jni.cpp	Thu May 23 03:59:28 2019 +0100
     2.3 @@ -1,5 +1,5 @@
     2.4  /*
     2.5 - * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
     2.6 + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
     2.7   * Copyright (c) 2012 Red Hat, Inc.
     2.8   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.9   *

Mercurial > jdk8-mips64-public > hotspot / changeset

changeset

Merge jdk8u222-b04