1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,402 @@ 1.4 +/* 1.5 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "asm/macroAssembler.inline.hpp" 1.30 +#include "memory/resourceArea.hpp" 1.31 +#include "runtime/java.hpp" 1.32 +#include "runtime/stubCodeGenerator.hpp" 1.33 +#include "vm_version_sparc.hpp" 1.34 +#ifdef TARGET_OS_FAMILY_linux 1.35 +# include "os_linux.inline.hpp" 1.36 +#endif 1.37 +#ifdef TARGET_OS_FAMILY_solaris 1.38 +# include "os_solaris.inline.hpp" 1.39 +#endif 1.40 + 1.41 +int VM_Version::_features = VM_Version::unknown_m; 1.42 +const char* VM_Version::_features_str = ""; 1.43 + 1.44 +void VM_Version::initialize() { 1.45 + _features = determine_features(); 1.46 + PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); 1.47 + PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); 1.48 + PrefetchFieldsAhead = prefetch_fields_ahead(); 1.49 + 1.50 + assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value"); 1.51 + if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 1.52 + if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0; 1.53 + 1.54 + // Allocation prefetch settings 1.55 + intx cache_line_size = prefetch_data_size(); 1.56 + if( cache_line_size > AllocatePrefetchStepSize ) 1.57 + AllocatePrefetchStepSize = cache_line_size; 1.58 + 1.59 + assert(AllocatePrefetchLines > 0, "invalid value"); 1.60 + if( AllocatePrefetchLines < 1 ) // set valid value in product VM 1.61 + AllocatePrefetchLines = 3; 1.62 + assert(AllocateInstancePrefetchLines > 0, "invalid value"); 1.63 + if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM 1.64 + AllocateInstancePrefetchLines = 1; 1.65 + 1.66 + AllocatePrefetchDistance = allocate_prefetch_distance(); 1.67 + AllocatePrefetchStyle = allocate_prefetch_style(); 1.68 + 1.69 + assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 && 1.70 + (AllocatePrefetchDistance > 0), "invalid value"); 1.71 + if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 || 1.72 + (AllocatePrefetchDistance <= 0)) { 1.73 + AllocatePrefetchDistance = AllocatePrefetchStepSize; 1.74 + } 1.75 + 1.76 + if (AllocatePrefetchStyle == 3 && !has_blk_init()) { 1.77 + warning("BIS instructions are not available on this CPU"); 1.78 + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); 1.79 + } 1.80 + 1.81 + guarantee(VM_Version::has_v9(), "only SPARC v9 is supported"); 1.82 + 1.83 + assert(ArraycopySrcPrefetchDistance < 4096, "invalid value"); 1.84 + if (ArraycopySrcPrefetchDistance >= 4096) 1.85 + ArraycopySrcPrefetchDistance = 4064; 1.86 + assert(ArraycopyDstPrefetchDistance < 4096, "invalid value"); 1.87 + if (ArraycopyDstPrefetchDistance >= 4096) 1.88 + ArraycopyDstPrefetchDistance = 4064; 1.89 + 1.90 + UseSSE = 0; // Only on x86 and x64 1.91 + 1.92 + _supports_cx8 = has_v9(); 1.93 + _supports_atomic_getset4 = true; // swap instruction 1.94 + 1.95 + // There are Fujitsu Sparc64 CPUs which support blk_init as well so 1.96 + // we have to take this check out of the 'is_niagara()' block below. 1.97 + if (has_blk_init()) { 1.98 + // When using CMS or G1, we cannot use memset() in BOT updates 1.99 + // because the sun4v/CMT version in libc_psr uses BIS which 1.100 + // exposes "phantom zeros" to concurrent readers. See 6948537. 1.101 + if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) { 1.102 + FLAG_SET_DEFAULT(UseMemSetInBOT, false); 1.103 + } 1.104 + // Issue a stern warning if the user has explicitly set 1.105 + // UseMemSetInBOT (it is known to cause issues), but allow 1.106 + // use for experimentation and debugging. 1.107 + if (UseConcMarkSweepGC || UseG1GC) { 1.108 + if (UseMemSetInBOT) { 1.109 + assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error"); 1.110 + warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability" 1.111 + " on sun4v; please understand that you are using at your own risk!"); 1.112 + } 1.113 + } 1.114 + } 1.115 + 1.116 + if (is_niagara()) { 1.117 + // Indirect branch is the same cost as direct 1.118 + if (FLAG_IS_DEFAULT(UseInlineCaches)) { 1.119 + FLAG_SET_DEFAULT(UseInlineCaches, false); 1.120 + } 1.121 + // Align loops on a single instruction boundary. 1.122 + if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { 1.123 + FLAG_SET_DEFAULT(OptoLoopAlignment, 4); 1.124 + } 1.125 +#ifdef _LP64 1.126 + // 32-bit oops don't make sense for the 64-bit VM on sparc 1.127 + // since the 32-bit VM has the same registers and smaller objects. 1.128 + Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); 1.129 + Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes); 1.130 +#endif // _LP64 1.131 +#ifdef COMPILER2 1.132 + // Indirect branch is the same cost as direct 1.133 + if (FLAG_IS_DEFAULT(UseJumpTables)) { 1.134 + FLAG_SET_DEFAULT(UseJumpTables, true); 1.135 + } 1.136 + // Single-issue, so entry and loop tops are 1.137 + // aligned on a single instruction boundary 1.138 + if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) { 1.139 + FLAG_SET_DEFAULT(InteriorEntryAlignment, 4); 1.140 + } 1.141 + if (is_niagara_plus()) { 1.142 + if (has_blk_init() && UseTLAB && 1.143 + FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1.144 + // Use BIS instruction for TLAB allocation prefetch. 1.145 + FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1); 1.146 + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1.147 + FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3); 1.148 + } 1.149 + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1.150 + // Use smaller prefetch distance with BIS 1.151 + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); 1.152 + } 1.153 + } 1.154 + if (is_T4()) { 1.155 + // Double number of prefetched cache lines on T4 1.156 + // since L2 cache line size is smaller (32 bytes). 1.157 + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { 1.158 + FLAG_SET_ERGO(intx, AllocatePrefetchLines, AllocatePrefetchLines*2); 1.159 + } 1.160 + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { 1.161 + FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2); 1.162 + } 1.163 + } 1.164 + if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1.165 + // Use different prefetch distance without BIS 1.166 + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); 1.167 + } 1.168 + if (AllocatePrefetchInstr == 1) { 1.169 + // Need a space at the end of TLAB for BIS since it 1.170 + // will fault when accessing memory outside of heap. 1.171 + 1.172 + // +1 for rounding up to next cache line, +1 to be safe 1.173 + int lines = AllocatePrefetchLines + 2; 1.174 + int step_size = AllocatePrefetchStepSize; 1.175 + int distance = AllocatePrefetchDistance; 1.176 + _reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize; 1.177 + } 1.178 + } 1.179 +#endif 1.180 + } 1.181 + 1.182 + // Use hardware population count instruction if available. 1.183 + if (has_hardware_popc()) { 1.184 + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1.185 + FLAG_SET_DEFAULT(UsePopCountInstruction, true); 1.186 + } 1.187 + } else if (UsePopCountInstruction) { 1.188 + warning("POPC instruction is not available on this CPU"); 1.189 + FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1.190 + } 1.191 + 1.192 + // T4 and newer Sparc cpus have new compare and branch instruction. 1.193 + if (has_cbcond()) { 1.194 + if (FLAG_IS_DEFAULT(UseCBCond)) { 1.195 + FLAG_SET_DEFAULT(UseCBCond, true); 1.196 + } 1.197 + } else if (UseCBCond) { 1.198 + warning("CBCOND instruction is not available on this CPU"); 1.199 + FLAG_SET_DEFAULT(UseCBCond, false); 1.200 + } 1.201 + 1.202 + assert(BlockZeroingLowLimit > 0, "invalid value"); 1.203 + if (has_block_zeroing()) { 1.204 + if (FLAG_IS_DEFAULT(UseBlockZeroing)) { 1.205 + FLAG_SET_DEFAULT(UseBlockZeroing, true); 1.206 + } 1.207 + } else if (UseBlockZeroing) { 1.208 + warning("BIS zeroing instructions are not available on this CPU"); 1.209 + FLAG_SET_DEFAULT(UseBlockZeroing, false); 1.210 + } 1.211 + 1.212 + assert(BlockCopyLowLimit > 0, "invalid value"); 1.213 + if (has_block_zeroing()) { // has_blk_init() && is_T4(): core's local L2 cache 1.214 + if (FLAG_IS_DEFAULT(UseBlockCopy)) { 1.215 + FLAG_SET_DEFAULT(UseBlockCopy, true); 1.216 + } 1.217 + } else if (UseBlockCopy) { 1.218 + warning("BIS instructions are not available or expensive on this CPU"); 1.219 + FLAG_SET_DEFAULT(UseBlockCopy, false); 1.220 + } 1.221 + 1.222 +#ifdef COMPILER2 1.223 + // T4 and newer Sparc cpus have fast RDPC. 1.224 + if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) { 1.225 + FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true); 1.226 + } 1.227 + 1.228 + // Currently not supported anywhere. 1.229 + FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1.230 + 1.231 + MaxVectorSize = 8; 1.232 + 1.233 + assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); 1.234 +#endif 1.235 + 1.236 + assert((CodeEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); 1.237 + assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); 1.238 + 1.239 + char buf[512]; 1.240 + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.241 + (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), 1.242 + (has_hardware_popc() ? ", popc" : ""), 1.243 + (has_vis1() ? ", vis1" : ""), 1.244 + (has_vis2() ? ", vis2" : ""), 1.245 + (has_vis3() ? ", vis3" : ""), 1.246 + (has_blk_init() ? ", blk_init" : ""), 1.247 + (has_cbcond() ? ", cbcond" : ""), 1.248 + (has_aes() ? ", aes" : ""), 1.249 + (is_ultra3() ? ", ultra3" : ""), 1.250 + (is_sun4v() ? ", sun4v" : ""), 1.251 + (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), 1.252 + (is_sparc64() ? ", sparc64" : ""), 1.253 + (!has_hardware_mul32() ? ", no-mul32" : ""), 1.254 + (!has_hardware_div32() ? ", no-div32" : ""), 1.255 + (!has_hardware_fsmuld() ? ", no-fsmuld" : "")); 1.256 + 1.257 + // buf is started with ", " or is empty 1.258 + _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf); 1.259 + 1.260 + // UseVIS is set to the smallest of what hardware supports and what 1.261 + // the command line requires. I.e., you cannot set UseVIS to 3 on 1.262 + // older UltraSparc which do not support it. 1.263 + if (UseVIS > 3) UseVIS=3; 1.264 + if (UseVIS < 0) UseVIS=0; 1.265 + if (!has_vis3()) // Drop to 2 if no VIS3 support 1.266 + UseVIS = MIN2((intx)2,UseVIS); 1.267 + if (!has_vis2()) // Drop to 1 if no VIS2 support 1.268 + UseVIS = MIN2((intx)1,UseVIS); 1.269 + if (!has_vis1()) // Drop to 0 if no VIS1 support 1.270 + UseVIS = 0; 1.271 + 1.272 + // SPARC T4 and above should have support for AES instructions 1.273 + if (has_aes()) { 1.274 + if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3 1.275 + if (FLAG_IS_DEFAULT(UseAES)) { 1.276 + FLAG_SET_DEFAULT(UseAES, true); 1.277 + } 1.278 + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1.279 + FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1.280 + } 1.281 + // we disable both the AES flags if either of them is disabled on the command line 1.282 + if (!UseAES || !UseAESIntrinsics) { 1.283 + FLAG_SET_DEFAULT(UseAES, false); 1.284 + FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1.285 + } 1.286 + } else { 1.287 + if (UseAES || UseAESIntrinsics) { 1.288 + warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled."); 1.289 + if (UseAES) { 1.290 + FLAG_SET_DEFAULT(UseAES, false); 1.291 + } 1.292 + if (UseAESIntrinsics) { 1.293 + FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1.294 + } 1.295 + } 1.296 + } 1.297 + } else if (UseAES || UseAESIntrinsics) { 1.298 + warning("AES instructions are not available on this CPU"); 1.299 + if (UseAES) { 1.300 + FLAG_SET_DEFAULT(UseAES, false); 1.301 + } 1.302 + if (UseAESIntrinsics) { 1.303 + FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1.304 + } 1.305 + } 1.306 + 1.307 + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1.308 + (cache_line_size > ContendedPaddingWidth)) 1.309 + ContendedPaddingWidth = cache_line_size; 1.310 + 1.311 +#ifndef PRODUCT 1.312 + if (PrintMiscellaneous && Verbose) { 1.313 + tty->print("Allocation"); 1.314 + if (AllocatePrefetchStyle <= 0) { 1.315 + tty->print_cr(": no prefetching"); 1.316 + } else { 1.317 + tty->print(" prefetching: "); 1.318 + if (AllocatePrefetchInstr == 0) { 1.319 + tty->print("PREFETCH"); 1.320 + } else if (AllocatePrefetchInstr == 1) { 1.321 + tty->print("BIS"); 1.322 + } 1.323 + if (AllocatePrefetchLines > 1) { 1.324 + tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); 1.325 + } else { 1.326 + tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); 1.327 + } 1.328 + } 1.329 + if (PrefetchCopyIntervalInBytes > 0) { 1.330 + tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1.331 + } 1.332 + if (PrefetchScanIntervalInBytes > 0) { 1.333 + tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1.334 + } 1.335 + if (PrefetchFieldsAhead > 0) { 1.336 + tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead); 1.337 + } 1.338 + if (ContendedPaddingWidth > 0) { 1.339 + tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1.340 + } 1.341 + } 1.342 +#endif // PRODUCT 1.343 +} 1.344 + 1.345 +void VM_Version::print_features() { 1.346 + tty->print_cr("Version:%s", cpu_features()); 1.347 +} 1.348 + 1.349 +int VM_Version::determine_features() { 1.350 + if (UseV8InstrsOnly) { 1.351 + NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");) 1.352 + return generic_v8_m; 1.353 + } 1.354 + 1.355 + int features = platform_features(unknown_m); // platform_features() is os_arch specific 1.356 + 1.357 + if (features == unknown_m) { 1.358 + features = generic_v9_m; 1.359 + warning("Cannot recognize SPARC version. Default to V9"); 1.360 + } 1.361 + 1.362 + assert(is_T_family(features) == is_niagara(features), "Niagara should be T series"); 1.363 + if (UseNiagaraInstrs) { // Force code generation for Niagara 1.364 + if (is_T_family(features)) { 1.365 + // Happy to accomodate... 1.366 + } else { 1.367 + NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");) 1.368 + features |= T_family_m; 1.369 + } 1.370 + } else { 1.371 + if (is_T_family(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) { 1.372 + NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");) 1.373 + features &= ~(T_family_m | T1_model_m); 1.374 + } else { 1.375 + // Happy to accomodate... 1.376 + } 1.377 + } 1.378 + 1.379 + return features; 1.380 +} 1.381 + 1.382 +static int saved_features = 0; 1.383 + 1.384 +void VM_Version::allow_all() { 1.385 + saved_features = _features; 1.386 + _features = all_features_m; 1.387 +} 1.388 + 1.389 +void VM_Version::revert() { 1.390 + _features = saved_features; 1.391 +} 1.392 + 1.393 +unsigned int VM_Version::calc_parallel_worker_threads() { 1.394 + unsigned int result; 1.395 + if (is_M_series()) { 1.396 + // for now, use same gc thread calculation for M-series as for niagara-plus 1.397 + // in future, we may want to tweak parameters for nof_parallel_worker_thread 1.398 + result = nof_parallel_worker_threads(5, 16, 8); 1.399 + } else if (is_niagara_plus()) { 1.400 + result = nof_parallel_worker_threads(5, 16, 8); 1.401 + } else { 1.402 + result = nof_parallel_worker_threads(5, 8, 8); 1.403 + } 1.404 + return result; 1.405 +}