src/cpu/sparc/vm/vm_version_sparc.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,402 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "precompiled.hpp"
    1.29 +#include "asm/macroAssembler.inline.hpp"
    1.30 +#include "memory/resourceArea.hpp"
    1.31 +#include "runtime/java.hpp"
    1.32 +#include "runtime/stubCodeGenerator.hpp"
    1.33 +#include "vm_version_sparc.hpp"
    1.34 +#ifdef TARGET_OS_FAMILY_linux
    1.35 +# include "os_linux.inline.hpp"
    1.36 +#endif
    1.37 +#ifdef TARGET_OS_FAMILY_solaris
    1.38 +# include "os_solaris.inline.hpp"
    1.39 +#endif
    1.40 +
    1.41 +int VM_Version::_features = VM_Version::unknown_m;
    1.42 +const char* VM_Version::_features_str = "";
    1.43 +
    1.44 +void VM_Version::initialize() {
    1.45 +  _features = determine_features();
    1.46 +  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
    1.47 +  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
    1.48 +  PrefetchFieldsAhead         = prefetch_fields_ahead();
    1.49 +
    1.50 +  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value");
    1.51 +  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
    1.52 +  if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0;
    1.53 +
    1.54 +  // Allocation prefetch settings
    1.55 +  intx cache_line_size = prefetch_data_size();
    1.56 +  if( cache_line_size > AllocatePrefetchStepSize )
    1.57 +    AllocatePrefetchStepSize = cache_line_size;
    1.58 +
    1.59 +  assert(AllocatePrefetchLines > 0, "invalid value");
    1.60 +  if( AllocatePrefetchLines < 1 )     // set valid value in product VM
    1.61 +    AllocatePrefetchLines = 3;
    1.62 +  assert(AllocateInstancePrefetchLines > 0, "invalid value");
    1.63 +  if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM
    1.64 +    AllocateInstancePrefetchLines = 1;
    1.65 +
    1.66 +  AllocatePrefetchDistance = allocate_prefetch_distance();
    1.67 +  AllocatePrefetchStyle    = allocate_prefetch_style();
    1.68 +
    1.69 +  assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 &&
    1.70 +         (AllocatePrefetchDistance > 0), "invalid value");
    1.71 +  if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 ||
    1.72 +      (AllocatePrefetchDistance <= 0)) {
    1.73 +    AllocatePrefetchDistance = AllocatePrefetchStepSize;
    1.74 +  }
    1.75 +
    1.76 +  if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
    1.77 +    warning("BIS instructions are not available on this CPU");
    1.78 +    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
    1.79 +  }
    1.80 +
    1.81 +  guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");
    1.82 +
    1.83 +  assert(ArraycopySrcPrefetchDistance < 4096, "invalid value");
    1.84 +  if (ArraycopySrcPrefetchDistance >= 4096)
    1.85 +    ArraycopySrcPrefetchDistance = 4064;
    1.86 +  assert(ArraycopyDstPrefetchDistance < 4096, "invalid value");
    1.87 +  if (ArraycopyDstPrefetchDistance >= 4096)
    1.88 +    ArraycopyDstPrefetchDistance = 4064;
    1.89 +
    1.90 +  UseSSE = 0; // Only on x86 and x64
    1.91 +
    1.92 +  _supports_cx8 = has_v9();
    1.93 +  _supports_atomic_getset4 = true; // swap instruction
    1.94 +
    1.95 +  // There are Fujitsu Sparc64 CPUs which support blk_init as well so
    1.96 +  // we have to take this check out of the 'is_niagara()' block below.
    1.97 +  if (has_blk_init()) {
    1.98 +    // When using CMS or G1, we cannot use memset() in BOT updates
    1.99 +    // because the sun4v/CMT version in libc_psr uses BIS which
   1.100 +    // exposes "phantom zeros" to concurrent readers. See 6948537.
   1.101 +    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
   1.102 +      FLAG_SET_DEFAULT(UseMemSetInBOT, false);
   1.103 +    }
   1.104 +    // Issue a stern warning if the user has explicitly set
   1.105 +    // UseMemSetInBOT (it is known to cause issues), but allow
   1.106 +    // use for experimentation and debugging.
   1.107 +    if (UseConcMarkSweepGC || UseG1GC) {
   1.108 +      if (UseMemSetInBOT) {
   1.109 +        assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
   1.110 +        warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
   1.111 +                " on sun4v; please understand that you are using at your own risk!");
   1.112 +      }
   1.113 +    }
   1.114 +  }
   1.115 +
   1.116 +  if (is_niagara()) {
   1.117 +    // Indirect branch is the same cost as direct
   1.118 +    if (FLAG_IS_DEFAULT(UseInlineCaches)) {
   1.119 +      FLAG_SET_DEFAULT(UseInlineCaches, false);
   1.120 +    }
   1.121 +    // Align loops on a single instruction boundary.
   1.122 +    if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
   1.123 +      FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
   1.124 +    }
   1.125 +#ifdef _LP64
   1.126 +    // 32-bit oops don't make sense for the 64-bit VM on sparc
   1.127 +    // since the 32-bit VM has the same registers and smaller objects.
   1.128 +    Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   1.129 +    Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes);
   1.130 +#endif // _LP64
   1.131 +#ifdef COMPILER2
   1.132 +    // Indirect branch is the same cost as direct
   1.133 +    if (FLAG_IS_DEFAULT(UseJumpTables)) {
   1.134 +      FLAG_SET_DEFAULT(UseJumpTables, true);
   1.135 +    }
   1.136 +    // Single-issue, so entry and loop tops are
   1.137 +    // aligned on a single instruction boundary
   1.138 +    if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
   1.139 +      FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
   1.140 +    }
   1.141 +    if (is_niagara_plus()) {
   1.142 +      if (has_blk_init() && UseTLAB &&
   1.143 +          FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
   1.144 +        // Use BIS instruction for TLAB allocation prefetch.
   1.145 +        FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
   1.146 +        if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
   1.147 +          FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
   1.148 +        }
   1.149 +        if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
   1.150 +          // Use smaller prefetch distance with BIS
   1.151 +          FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
   1.152 +        }
   1.153 +      }
   1.154 +      if (is_T4()) {
   1.155 +        // Double number of prefetched cache lines on T4
   1.156 +        // since L2 cache line size is smaller (32 bytes).
   1.157 +        if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
   1.158 +          FLAG_SET_ERGO(intx, AllocatePrefetchLines, AllocatePrefetchLines*2);
   1.159 +        }
   1.160 +        if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
   1.161 +          FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
   1.162 +        }
   1.163 +      }
   1.164 +      if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
   1.165 +        // Use different prefetch distance without BIS
   1.166 +        FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
   1.167 +      }
   1.168 +      if (AllocatePrefetchInstr == 1) {
   1.169 +        // Need a space at the end of TLAB for BIS since it
   1.170 +        // will fault when accessing memory outside of heap.
   1.171 +
   1.172 +        // +1 for rounding up to next cache line, +1 to be safe
   1.173 +        int lines = AllocatePrefetchLines + 2;
   1.174 +        int step_size = AllocatePrefetchStepSize;
   1.175 +        int distance = AllocatePrefetchDistance;
   1.176 +        _reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
   1.177 +      }
   1.178 +    }
   1.179 +#endif
   1.180 +  }
   1.181 +
   1.182 +  // Use hardware population count instruction if available.
   1.183 +  if (has_hardware_popc()) {
   1.184 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
   1.185 +      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
   1.186 +    }
   1.187 +  } else if (UsePopCountInstruction) {
   1.188 +    warning("POPC instruction is not available on this CPU");
   1.189 +    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
   1.190 +  }
   1.191 +
   1.192 +  // T4 and newer Sparc cpus have new compare and branch instruction.
   1.193 +  if (has_cbcond()) {
   1.194 +    if (FLAG_IS_DEFAULT(UseCBCond)) {
   1.195 +      FLAG_SET_DEFAULT(UseCBCond, true);
   1.196 +    }
   1.197 +  } else if (UseCBCond) {
   1.198 +    warning("CBCOND instruction is not available on this CPU");
   1.199 +    FLAG_SET_DEFAULT(UseCBCond, false);
   1.200 +  }
   1.201 +
   1.202 +  assert(BlockZeroingLowLimit > 0, "invalid value");
   1.203 +  if (has_block_zeroing()) {
   1.204 +    if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
   1.205 +      FLAG_SET_DEFAULT(UseBlockZeroing, true);
   1.206 +    }
   1.207 +  } else if (UseBlockZeroing) {
   1.208 +    warning("BIS zeroing instructions are not available on this CPU");
   1.209 +    FLAG_SET_DEFAULT(UseBlockZeroing, false);
   1.210 +  }
   1.211 +
   1.212 +  assert(BlockCopyLowLimit > 0, "invalid value");
   1.213 +  if (has_block_zeroing()) { // has_blk_init() && is_T4(): core's local L2 cache
   1.214 +    if (FLAG_IS_DEFAULT(UseBlockCopy)) {
   1.215 +      FLAG_SET_DEFAULT(UseBlockCopy, true);
   1.216 +    }
   1.217 +  } else if (UseBlockCopy) {
   1.218 +    warning("BIS instructions are not available or expensive on this CPU");
   1.219 +    FLAG_SET_DEFAULT(UseBlockCopy, false);
   1.220 +  }
   1.221 +
   1.222 +#ifdef COMPILER2
   1.223 +  // T4 and newer Sparc cpus have fast RDPC.
   1.224 +  if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) {
   1.225 +    FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true);
   1.226 +  }
   1.227 +
   1.228 +  // Currently not supported anywhere.
   1.229 +  FLAG_SET_DEFAULT(UseFPUForSpilling, false);
   1.230 +
   1.231 +  MaxVectorSize = 8;
   1.232 +
   1.233 +  assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
   1.234 +#endif
   1.235 +
   1.236 +  assert((CodeEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
   1.237 +  assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
   1.238 +
   1.239 +  char buf[512];
   1.240 +  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
   1.241 +               (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
   1.242 +               (has_hardware_popc() ? ", popc" : ""),
   1.243 +               (has_vis1() ? ", vis1" : ""),
   1.244 +               (has_vis2() ? ", vis2" : ""),
   1.245 +               (has_vis3() ? ", vis3" : ""),
   1.246 +               (has_blk_init() ? ", blk_init" : ""),
   1.247 +               (has_cbcond() ? ", cbcond" : ""),
   1.248 +               (has_aes() ? ", aes" : ""),
   1.249 +               (is_ultra3() ? ", ultra3" : ""),
   1.250 +               (is_sun4v() ? ", sun4v" : ""),
   1.251 +               (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
   1.252 +               (is_sparc64() ? ", sparc64" : ""),
   1.253 +               (!has_hardware_mul32() ? ", no-mul32" : ""),
   1.254 +               (!has_hardware_div32() ? ", no-div32" : ""),
   1.255 +               (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
   1.256 +
   1.257 +  // buf is started with ", " or is empty
   1.258 +  _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf);
   1.259 +
   1.260 +  // UseVIS is set to the smallest of what hardware supports and what
   1.261 +  // the command line requires.  I.e., you cannot set UseVIS to 3 on
   1.262 +  // older UltraSparc which do not support it.
   1.263 +  if (UseVIS > 3) UseVIS=3;
   1.264 +  if (UseVIS < 0) UseVIS=0;
   1.265 +  if (!has_vis3()) // Drop to 2 if no VIS3 support
   1.266 +    UseVIS = MIN2((intx)2,UseVIS);
   1.267 +  if (!has_vis2()) // Drop to 1 if no VIS2 support
   1.268 +    UseVIS = MIN2((intx)1,UseVIS);
   1.269 +  if (!has_vis1()) // Drop to 0 if no VIS1 support
   1.270 +    UseVIS = 0;
   1.271 +
   1.272 +  // SPARC T4 and above should have support for AES instructions
   1.273 +  if (has_aes()) {
   1.274 +    if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3
   1.275 +      if (FLAG_IS_DEFAULT(UseAES)) {
   1.276 +        FLAG_SET_DEFAULT(UseAES, true);
   1.277 +      }
   1.278 +      if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
   1.279 +        FLAG_SET_DEFAULT(UseAESIntrinsics, true);
   1.280 +      }
   1.281 +      // we disable both the AES flags if either of them is disabled on the command line
   1.282 +      if (!UseAES || !UseAESIntrinsics) {
   1.283 +        FLAG_SET_DEFAULT(UseAES, false);
   1.284 +        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   1.285 +      }
   1.286 +    } else {
   1.287 +        if (UseAES || UseAESIntrinsics) {
   1.288 +          warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
   1.289 +          if (UseAES) {
   1.290 +            FLAG_SET_DEFAULT(UseAES, false);
   1.291 +          }
   1.292 +          if (UseAESIntrinsics) {
   1.293 +            FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   1.294 +          }
   1.295 +        }
   1.296 +    }
   1.297 +  } else if (UseAES || UseAESIntrinsics) {
   1.298 +    warning("AES instructions are not available on this CPU");
   1.299 +    if (UseAES) {
   1.300 +      FLAG_SET_DEFAULT(UseAES, false);
   1.301 +    }
   1.302 +    if (UseAESIntrinsics) {
   1.303 +      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   1.304 +    }
   1.305 +  }
   1.306 +
   1.307 +  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
   1.308 +    (cache_line_size > ContendedPaddingWidth))
   1.309 +    ContendedPaddingWidth = cache_line_size;
   1.310 +
   1.311 +#ifndef PRODUCT
   1.312 +  if (PrintMiscellaneous && Verbose) {
   1.313 +    tty->print("Allocation");
   1.314 +    if (AllocatePrefetchStyle <= 0) {
   1.315 +      tty->print_cr(": no prefetching");
   1.316 +    } else {
   1.317 +      tty->print(" prefetching: ");
   1.318 +      if (AllocatePrefetchInstr == 0) {
   1.319 +          tty->print("PREFETCH");
   1.320 +      } else if (AllocatePrefetchInstr == 1) {
   1.321 +          tty->print("BIS");
   1.322 +      }
   1.323 +      if (AllocatePrefetchLines > 1) {
   1.324 +        tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
   1.325 +      } else {
   1.326 +        tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
   1.327 +      }
   1.328 +    }
   1.329 +    if (PrefetchCopyIntervalInBytes > 0) {
   1.330 +      tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
   1.331 +    }
   1.332 +    if (PrefetchScanIntervalInBytes > 0) {
   1.333 +      tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
   1.334 +    }
   1.335 +    if (PrefetchFieldsAhead > 0) {
   1.336 +      tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
   1.337 +    }
   1.338 +    if (ContendedPaddingWidth > 0) {
   1.339 +      tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
   1.340 +    }
   1.341 +  }
   1.342 +#endif // PRODUCT
   1.343 +}
   1.344 +
   1.345 +void VM_Version::print_features() {
   1.346 +  tty->print_cr("Version:%s", cpu_features());
   1.347 +}
   1.348 +
   1.349 +int VM_Version::determine_features() {
   1.350 +  if (UseV8InstrsOnly) {
   1.351 +    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");)
   1.352 +    return generic_v8_m;
   1.353 +  }
   1.354 +
   1.355 +  int features = platform_features(unknown_m); // platform_features() is os_arch specific
   1.356 +
   1.357 +  if (features == unknown_m) {
   1.358 +    features = generic_v9_m;
   1.359 +    warning("Cannot recognize SPARC version. Default to V9");
   1.360 +  }
   1.361 +
   1.362 +  assert(is_T_family(features) == is_niagara(features), "Niagara should be T series");
   1.363 +  if (UseNiagaraInstrs) { // Force code generation for Niagara
   1.364 +    if (is_T_family(features)) {
   1.365 +      // Happy to accomodate...
   1.366 +    } else {
   1.367 +      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");)
   1.368 +      features |= T_family_m;
   1.369 +    }
   1.370 +  } else {
   1.371 +    if (is_T_family(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) {
   1.372 +      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");)
   1.373 +      features &= ~(T_family_m | T1_model_m);
   1.374 +    } else {
   1.375 +      // Happy to accomodate...
   1.376 +    }
   1.377 +  }
   1.378 +
   1.379 +  return features;
   1.380 +}
   1.381 +
   1.382 +static int saved_features = 0;
   1.383 +
   1.384 +void VM_Version::allow_all() {
   1.385 +  saved_features = _features;
   1.386 +  _features      = all_features_m;
   1.387 +}
   1.388 +
   1.389 +void VM_Version::revert() {
   1.390 +  _features = saved_features;
   1.391 +}
   1.392 +
   1.393 +unsigned int VM_Version::calc_parallel_worker_threads() {
   1.394 +  unsigned int result;
   1.395 +  if (is_M_series()) {
   1.396 +    // for now, use same gc thread calculation for M-series as for niagara-plus
   1.397 +    // in future, we may want to tweak parameters for nof_parallel_worker_thread
   1.398 +    result = nof_parallel_worker_threads(5, 16, 8);
   1.399 +  } else if (is_niagara_plus()) {
   1.400 +    result = nof_parallel_worker_threads(5, 16, 8);
   1.401 +  } else {
   1.402 +    result = nof_parallel_worker_threads(5, 8, 8);
   1.403 +  }
   1.404 +  return result;
   1.405 +}

mercurial