Wed, 12 Apr 2017 09:03:26 -0700
Merge
1.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Mar 27 08:21:39 2017 -0700 1.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Apr 12 09:03:26 2017 -0700 1.3 @@ -236,7 +236,7 @@ 1.4 assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); 1.5 1.6 char buf[512]; 1.7 - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.8 + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.9 (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), 1.10 (has_hardware_popc() ? ", popc" : ""), 1.11 (has_vis1() ? ", vis1" : ""), 1.12 @@ -249,6 +249,7 @@ 1.13 (has_sha256() ? ", sha256" : ""), 1.14 (has_sha512() ? ", sha512" : ""), 1.15 (is_ultra3() ? ", ultra3" : ""), 1.16 + (has_sparc5_instr() ? ", sparc5" : ""), 1.17 (is_sun4v() ? ", sun4v" : ""), 1.18 (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), 1.19 (is_sparc64() ? ", sparc64" : ""), 1.20 @@ -364,6 +365,7 @@ 1.21 1.22 #ifndef PRODUCT 1.23 if (PrintMiscellaneous && Verbose) { 1.24 + tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1.25 tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size()); 1.26 tty->print("Allocation"); 1.27 if (AllocatePrefetchStyle <= 0) { 1.28 @@ -447,9 +449,10 @@ 1.29 1.30 unsigned int VM_Version::calc_parallel_worker_threads() { 1.31 unsigned int result; 1.32 - if (is_M_series()) { 1.33 - // for now, use same gc thread calculation for M-series as for niagara-plus 1.34 - // in future, we may want to tweak parameters for nof_parallel_worker_thread 1.35 + if (is_M_series() || is_S_series()) { 1.36 + // for now, use same gc thread calculation for M-series and S-series as for 1.37 + // niagara-plus. In future, we may want to tweak parameters for 1.38 + // nof_parallel_worker_thread 1.39 result = nof_parallel_worker_threads(5, 16, 8); 1.40 } else if (is_niagara_plus()) { 1.41 result = nof_parallel_worker_threads(5, 16, 8); 1.42 @@ -458,3 +461,37 @@ 1.43 } 1.44 return result; 1.45 } 1.46 + 1.47 + 1.48 +int VM_Version::parse_features(const char* implementation) { 1.49 + int features = unknown_m; 1.50 + // Convert to UPPER case before compare. 1.51 + char* impl = os::strdup(implementation); 1.52 + 1.53 + for (int i = 0; impl[i] != 0; i++) 1.54 + impl[i] = (char)toupper((uint)impl[i]); 1.55 + 1.56 + if (strstr(impl, "SPARC64") != NULL) { 1.57 + features |= sparc64_family_m; 1.58 + } else if (strstr(impl, "SPARC-M") != NULL) { 1.59 + // M-series SPARC is based on T-series. 1.60 + features |= (M_family_m | T_family_m); 1.61 + } else if (strstr(impl, "SPARC-S") != NULL) { 1.62 + // S-series SPARC is based on T-series. 1.63 + features |= (S_family_m | T_family_m); 1.64 + } else if (strstr(impl, "SPARC-T") != NULL) { 1.65 + features |= T_family_m; 1.66 + if (strstr(impl, "SPARC-T1") != NULL) { 1.67 + features |= T1_model_m; 1.68 + } 1.69 + } else if (strstr(impl, "SUN4V-CPU") != NULL) { 1.70 + // Generic or migration class LDOM 1.71 + features |= T_family_m; 1.72 + } else { 1.73 +#ifndef PRODUCT 1.74 + warning("Failed to parse CPU implementation = '%s'", impl); 1.75 +#endif 1.76 + } 1.77 + os::free((void*)impl); 1.78 + return features; 1.79 +}
2.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Mar 27 08:21:39 2017 -0700 2.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Wed Apr 12 09:03:26 2017 -0700 2.3 @@ -47,13 +47,14 @@ 2.4 cbcond_instructions = 13, 2.5 sparc64_family = 14, 2.6 M_family = 15, 2.7 - T_family = 16, 2.8 - T1_model = 17, 2.9 - sparc5_instructions = 18, 2.10 - aes_instructions = 19, 2.11 - sha1_instruction = 20, 2.12 - sha256_instruction = 21, 2.13 - sha512_instruction = 22 2.14 + S_family = 16, 2.15 + T_family = 17, 2.16 + T1_model = 18, 2.17 + sparc5_instructions = 19, 2.18 + aes_instructions = 20, 2.19 + sha1_instruction = 21, 2.20 + sha256_instruction = 22, 2.21 + sha512_instruction = 23 2.22 }; 2.23 2.24 enum Feature_Flag_Set { 2.25 @@ -76,6 +77,7 @@ 2.26 cbcond_instructions_m = 1 << cbcond_instructions, 2.27 sparc64_family_m = 1 << sparc64_family, 2.28 M_family_m = 1 << M_family, 2.29 + S_family_m = 1 << S_family, 2.30 T_family_m = 1 << T_family, 2.31 T1_model_m = 1 << T1_model, 2.32 sparc5_instructions_m = 1 << sparc5_instructions, 2.33 @@ -105,6 +107,7 @@ 2.34 2.35 // Returns true if the platform is in the niagara line (T series) 2.36 static bool is_M_family(int features) { return (features & M_family_m) != 0; } 2.37 + static bool is_S_family(int features) { return (features & S_family_m) != 0; } 2.38 static bool is_T_family(int features) { return (features & T_family_m) != 0; } 2.39 static bool is_niagara() { return is_T_family(_features); } 2.40 #ifdef ASSERT 2.41 @@ -119,7 +122,7 @@ 2.42 static bool is_T1_model(int features) { return is_T_family(features) && ((features & T1_model_m) != 0); } 2.43 2.44 static int maximum_niagara1_processor_count() { return 32; } 2.45 - 2.46 + static int parse_features(const char* implementation); 2.47 public: 2.48 // Initialization 2.49 static void initialize(); 2.50 @@ -152,6 +155,7 @@ 2.51 static bool is_niagara_plus() { return is_T_family(_features) && !is_T1_model(_features); } 2.52 2.53 static bool is_M_series() { return is_M_family(_features); } 2.54 + static bool is_S_series() { return is_S_family(_features); } 2.55 static bool is_T4() { return is_T_family(_features) && has_cbcond(); } 2.56 static bool is_T7() { return is_T_family(_features) && has_sparc5_instr(); } 2.57
3.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Mon Mar 27 08:21:39 2017 -0700 3.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Wed Apr 12 09:03:26 2017 -0700 3.3 @@ -406,6 +406,8 @@ 3.4 _stepping = 0; 3.5 _cpuFeatures = 0; 3.6 _logical_processors_per_package = 1; 3.7 + // i486 internal cache is both I&D and has a 16-byte line size 3.8 + _L1_data_cache_line_size = 16; 3.9 3.10 if (!Use486InstrsOnly) { 3.11 // Get raw processor info 3.12 @@ -424,6 +426,7 @@ 3.13 // Logical processors are only available on P4s and above, 3.14 // and only if hyperthreading is available. 3.15 _logical_processors_per_package = logical_processor_count(); 3.16 + _L1_data_cache_line_size = L1_line_size(); 3.17 } 3.18 } 3.19 3.20 @@ -1034,6 +1037,7 @@ 3.21 if (PrintMiscellaneous && Verbose) { 3.22 tty->print_cr("Logical CPUs per core: %u", 3.23 logical_processors_per_package()); 3.24 + tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 3.25 tty->print("UseSSE=%d", (int) UseSSE); 3.26 if (UseAVX > 0) { 3.27 tty->print(" UseAVX=%d", (int) UseAVX);
4.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Mon Mar 27 08:21:39 2017 -0700 4.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Wed Apr 12 09:03:26 2017 -0700 4.3 @@ -1,5 +1,5 @@ 4.4 /* 4.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 4.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 4.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.8 * 4.9 * This code is free software; you can redistribute it and/or modify it 4.10 @@ -595,7 +595,7 @@ 4.11 return (result == 0 ? 1 : result); 4.12 } 4.13 4.14 - static intx prefetch_data_size() { 4.15 + static intx L1_line_size() { 4.16 intx result = 0; 4.17 if (is_intel()) { 4.18 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 4.19 @@ -607,6 +607,10 @@ 4.20 return result; 4.21 } 4.22 4.23 + static intx prefetch_data_size() { 4.24 + return L1_line_size(); 4.25 + } 4.26 + 4.27 // 4.28 // Feature identification 4.29 //
5.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Mon Mar 27 08:21:39 2017 -0700 5.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Wed Apr 12 09:03:26 2017 -0700 5.3 @@ -262,6 +262,7 @@ 5.4 5.5 // We need to keep these here as long as we have to build on Solaris 5.6 // versions before 10. 5.7 + 5.8 #ifndef SI_ARCHITECTURE_32 5.9 #define SI_ARCHITECTURE_32 516 /* basic 32-bit SI_ARCHITECTURE */ 5.10 #endif 5.11 @@ -270,231 +271,233 @@ 5.12 #define SI_ARCHITECTURE_64 517 /* basic 64-bit SI_ARCHITECTURE */ 5.13 #endif 5.14 5.15 -static void do_sysinfo(int si, const char* string, int* features, int mask) { 5.16 - char tmp; 5.17 - size_t bufsize = sysinfo(si, &tmp, 1); 5.18 +#ifndef SI_CPUBRAND 5.19 +#define SI_CPUBRAND 523 /* return cpu brand string */ 5.20 +#endif 5.21 5.22 - // All SI defines used below must be supported. 5.23 - guarantee(bufsize != -1, "must be supported"); 5.24 +class Sysinfo { 5.25 + char* _string; 5.26 +public: 5.27 + Sysinfo(int si) : _string(NULL) { 5.28 + char tmp; 5.29 + size_t bufsize = sysinfo(si, &tmp, 1); 5.30 5.31 - char* buf = (char*) malloc(bufsize); 5.32 + if (bufsize != -1) { 5.33 + char* buf = (char*) os::malloc(bufsize, mtInternal); 5.34 + if (buf != NULL) { 5.35 + if (sysinfo(si, buf, bufsize) == bufsize) { 5.36 + _string = buf; 5.37 + } else { 5.38 + os::free(buf); 5.39 + } 5.40 + } 5.41 + } 5.42 + } 5.43 5.44 - if (buf == NULL) 5.45 - return; 5.46 - 5.47 - if (sysinfo(si, buf, bufsize) == bufsize) { 5.48 - // Compare the string. 5.49 - if (strcmp(buf, string) == 0) { 5.50 - *features |= mask; 5.51 + ~Sysinfo() { 5.52 + if (_string != NULL) { 5.53 + os::free(_string); 5.54 } 5.55 } 5.56 5.57 - free(buf); 5.58 -} 5.59 + const char* value() const { 5.60 + return _string; 5.61 + } 5.62 + 5.63 + bool valid() const { 5.64 + return _string != NULL; 5.65 + } 5.66 + 5.67 + bool match(const char* s) const { 5.68 + return valid() ? strcmp(_string, s) == 0 : false; 5.69 + } 5.70 + 5.71 + bool match_substring(const char* s) const { 5.72 + return valid() ? strstr(_string, s) != NULL : false; 5.73 + } 5.74 +}; 5.75 + 5.76 +class Sysconf { 5.77 + int _value; 5.78 +public: 5.79 + Sysconf(int sc) : _value(-1) { 5.80 + _value = sysconf(sc); 5.81 + } 5.82 + bool valid() const { 5.83 + return _value != -1; 5.84 + } 5.85 + int value() const { 5.86 + return _value; 5.87 + } 5.88 +}; 5.89 + 5.90 + 5.91 +#ifndef _SC_DCACHE_LINESZ 5.92 +#define _SC_DCACHE_LINESZ 508 /* Data cache line size */ 5.93 +#endif 5.94 + 5.95 +#ifndef _SC_L2CACHE_LINESZ 5.96 +#define _SC_L2CACHE_LINESZ 527 /* Size of L2 cache line */ 5.97 +#endif 5.98 + 5.99 5.100 int VM_Version::platform_features(int features) { 5.101 - // getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are 5.102 - // supported on Solaris 10 and later. 5.103 - if (os::Solaris::supports_getisax()) { 5.104 + assert(os::Solaris::supports_getisax(), "getisax() must be available"); 5.105 5.106 - // Check 32-bit architecture. 5.107 - do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m); 5.108 + // Check 32-bit architecture. 5.109 + if (Sysinfo(SI_ARCHITECTURE_32).match("sparc")) { 5.110 + features |= v8_instructions_m; 5.111 + } 5.112 5.113 - // Check 64-bit architecture. 5.114 - do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m); 5.115 + // Check 64-bit architecture. 5.116 + if (Sysinfo(SI_ARCHITECTURE_64).match("sparcv9")) { 5.117 + features |= generic_v9_m; 5.118 + } 5.119 5.120 - // Extract valid instruction set extensions. 5.121 - uint_t avs[2]; 5.122 - uint_t avn = os::Solaris::getisax(avs, 2); 5.123 - assert(avn <= 2, "should return two or less av's"); 5.124 - uint_t av = avs[0]; 5.125 + // Extract valid instruction set extensions. 5.126 + uint_t avs[2]; 5.127 + uint_t avn = os::Solaris::getisax(avs, 2); 5.128 + assert(avn <= 2, "should return two or less av's"); 5.129 + uint_t av = avs[0]; 5.130 5.131 #ifndef PRODUCT 5.132 - if (PrintMiscellaneous && Verbose) { 5.133 - tty->print("getisax(2) returned: " PTR32_FORMAT, av); 5.134 - if (avn > 1) { 5.135 - tty->print(", " PTR32_FORMAT, avs[1]); 5.136 - } 5.137 - tty->cr(); 5.138 + if (PrintMiscellaneous && Verbose) { 5.139 + tty->print("getisax(2) returned: " PTR32_FORMAT, av); 5.140 + if (avn > 1) { 5.141 + tty->print(", " PTR32_FORMAT, avs[1]); 5.142 } 5.143 + tty->cr(); 5.144 + } 5.145 #endif 5.146 5.147 - if (av & AV_SPARC_MUL32) features |= hardware_mul32_m; 5.148 - if (av & AV_SPARC_DIV32) features |= hardware_div32_m; 5.149 - if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m; 5.150 - if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m; 5.151 - if (av & AV_SPARC_POPC) features |= hardware_popc_m; 5.152 - if (av & AV_SPARC_VIS) features |= vis1_instructions_m; 5.153 - if (av & AV_SPARC_VIS2) features |= vis2_instructions_m; 5.154 - if (avn > 1) { 5.155 - uint_t av2 = avs[1]; 5.156 + if (av & AV_SPARC_MUL32) features |= hardware_mul32_m; 5.157 + if (av & AV_SPARC_DIV32) features |= hardware_div32_m; 5.158 + if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m; 5.159 + if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m; 5.160 + if (av & AV_SPARC_POPC) features |= hardware_popc_m; 5.161 + if (av & AV_SPARC_VIS) features |= vis1_instructions_m; 5.162 + if (av & AV_SPARC_VIS2) features |= vis2_instructions_m; 5.163 + if (avn > 1) { 5.164 + uint_t av2 = avs[1]; 5.165 #ifndef AV2_SPARC_SPARC5 5.166 #define AV2_SPARC_SPARC5 0x00000008 /* The 29 new fp and sub instructions */ 5.167 #endif 5.168 - if (av2 & AV2_SPARC_SPARC5) features |= sparc5_instructions_m; 5.169 - } 5.170 + if (av2 & AV2_SPARC_SPARC5) features |= sparc5_instructions_m; 5.171 + } 5.172 5.173 - // Next values are not defined before Solaris 10 5.174 - // but Solaris 8 is used for jdk6 update builds. 5.175 + // We only build on Solaris 10 and up, but some of the values below 5.176 + // are not defined on all versions of Solaris 10, so we define them, 5.177 + // if necessary. 5.178 #ifndef AV_SPARC_ASI_BLK_INIT 5.179 #define AV_SPARC_ASI_BLK_INIT 0x0080 /* ASI_BLK_INIT_xxx ASI */ 5.180 #endif 5.181 - if (av & AV_SPARC_ASI_BLK_INIT) features |= blk_init_instructions_m; 5.182 + if (av & AV_SPARC_ASI_BLK_INIT) features |= blk_init_instructions_m; 5.183 5.184 #ifndef AV_SPARC_FMAF 5.185 #define AV_SPARC_FMAF 0x0100 /* Fused Multiply-Add */ 5.186 #endif 5.187 - if (av & AV_SPARC_FMAF) features |= fmaf_instructions_m; 5.188 + if (av & AV_SPARC_FMAF) features |= fmaf_instructions_m; 5.189 5.190 #ifndef AV_SPARC_FMAU 5.191 -#define AV_SPARC_FMAU 0x0200 /* Unfused Multiply-Add */ 5.192 +#define AV_SPARC_FMAU 0x0200 /* Unfused Multiply-Add */ 5.193 #endif 5.194 - if (av & AV_SPARC_FMAU) features |= fmau_instructions_m; 5.195 + if (av & AV_SPARC_FMAU) features |= fmau_instructions_m; 5.196 5.197 #ifndef AV_SPARC_VIS3 5.198 -#define AV_SPARC_VIS3 0x0400 /* VIS3 instruction set extensions */ 5.199 +#define AV_SPARC_VIS3 0x0400 /* VIS3 instruction set extensions */ 5.200 #endif 5.201 - if (av & AV_SPARC_VIS3) features |= vis3_instructions_m; 5.202 + if (av & AV_SPARC_VIS3) features |= vis3_instructions_m; 5.203 5.204 #ifndef AV_SPARC_CBCOND 5.205 #define AV_SPARC_CBCOND 0x10000000 /* compare and branch instrs supported */ 5.206 #endif 5.207 - if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m; 5.208 + if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m; 5.209 5.210 #ifndef AV_SPARC_AES 5.211 #define AV_SPARC_AES 0x00020000 /* aes instrs supported */ 5.212 #endif 5.213 - if (av & AV_SPARC_AES) features |= aes_instructions_m; 5.214 + if (av & AV_SPARC_AES) features |= aes_instructions_m; 5.215 5.216 #ifndef AV_SPARC_SHA1 5.217 #define AV_SPARC_SHA1 0x00400000 /* sha1 instruction supported */ 5.218 #endif 5.219 - if (av & AV_SPARC_SHA1) features |= sha1_instruction_m; 5.220 + if (av & AV_SPARC_SHA1) features |= sha1_instruction_m; 5.221 5.222 #ifndef AV_SPARC_SHA256 5.223 #define AV_SPARC_SHA256 0x00800000 /* sha256 instruction supported */ 5.224 #endif 5.225 - if (av & AV_SPARC_SHA256) features |= sha256_instruction_m; 5.226 + if (av & AV_SPARC_SHA256) features |= sha256_instruction_m; 5.227 5.228 #ifndef AV_SPARC_SHA512 5.229 #define AV_SPARC_SHA512 0x01000000 /* sha512 instruction supported */ 5.230 #endif 5.231 - if (av & AV_SPARC_SHA512) features |= sha512_instruction_m; 5.232 + if (av & AV_SPARC_SHA512) features |= sha512_instruction_m; 5.233 5.234 + // Determine the machine type. 5.235 + if (Sysinfo(SI_MACHINE).match("sun4v")) { 5.236 + features |= sun4v_m; 5.237 + } 5.238 + 5.239 + // If SI_CPUBRAND works, that means Solaris 12 API to get the cache line sizes 5.240 + // is available to us as well 5.241 + Sysinfo cpu_info(SI_CPUBRAND); 5.242 + bool use_solaris_12_api = cpu_info.valid(); 5.243 + const char* impl; 5.244 + int impl_m = 0; 5.245 + if (use_solaris_12_api) { 5.246 + impl = cpu_info.value(); 5.247 +#ifndef PRODUCT 5.248 + if (PrintMiscellaneous && Verbose) { 5.249 + tty->print_cr("Parsing CPU implementation from %s", impl); 5.250 + } 5.251 +#endif 5.252 + impl_m = parse_features(impl); 5.253 } else { 5.254 - // getisax(2) failed, use the old legacy code. 5.255 + // Otherwise use kstat to determine the machine type. 5.256 + kstat_ctl_t* kc = kstat_open(); 5.257 + if (kc != NULL) { 5.258 + kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, NULL); 5.259 + if (ksp != NULL) { 5.260 + if (kstat_read(kc, ksp, NULL) != -1 && ksp->ks_data != NULL) { 5.261 + kstat_named_t* knm = (kstat_named_t *)ksp->ks_data; 5.262 + for (int i = 0; i < ksp->ks_ndata; i++) { 5.263 + if (strcmp((const char*)&(knm[i].name), "implementation") == 0) { 5.264 + impl = KSTAT_NAMED_STR_PTR(&knm[i]); 5.265 #ifndef PRODUCT 5.266 - if (PrintMiscellaneous && Verbose) 5.267 - tty->print_cr("getisax(2) is not supported."); 5.268 + if (PrintMiscellaneous && Verbose) { 5.269 + tty->print_cr("Parsing CPU implementation from %s", impl); 5.270 + } 5.271 #endif 5.272 - 5.273 - char tmp; 5.274 - size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1); 5.275 - char* buf = (char*) malloc(bufsize); 5.276 - 5.277 - if (buf != NULL) { 5.278 - if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) { 5.279 - // Figure out what kind of sparc we have 5.280 - char *sparc_string = strstr(buf, "sparc"); 5.281 - if (sparc_string != NULL) { features |= v8_instructions_m; 5.282 - if (sparc_string[5] == 'v') { 5.283 - if (sparc_string[6] == '8') { 5.284 - if (sparc_string[7] == '-') { features |= hardware_mul32_m; 5.285 - features |= hardware_div32_m; 5.286 - } else if (sparc_string[7] == 'p') features |= generic_v9_m; 5.287 - else features |= generic_v8_m; 5.288 - } else if (sparc_string[6] == '9') features |= generic_v9_m; 5.289 + impl_m = parse_features(impl); 5.290 + break; 5.291 + } 5.292 } 5.293 } 5.294 - 5.295 - // Check for visualization instructions 5.296 - char *vis = strstr(buf, "vis"); 5.297 - if (vis != NULL) { features |= vis1_instructions_m; 5.298 - if (vis[3] == '2') features |= vis2_instructions_m; 5.299 - } 5.300 } 5.301 - free(buf); 5.302 + kstat_close(kc); 5.303 } 5.304 } 5.305 + assert(impl_m != 0, err_msg("Unknown CPU implementation %s", impl)); 5.306 + features |= impl_m; 5.307 5.308 - // Determine the machine type. 5.309 - do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m); 5.310 + bool is_sun4v = (features & sun4v_m) != 0; 5.311 + if (use_solaris_12_api && is_sun4v) { 5.312 + // If Solaris 12 API is supported and it's sun4v use sysconf() to get the cache line sizes 5.313 + Sysconf l1_dcache_line_size(_SC_DCACHE_LINESZ); 5.314 + if (l1_dcache_line_size.valid()) { 5.315 + _L1_data_cache_line_size = l1_dcache_line_size.value(); 5.316 + } 5.317 5.318 - { 5.319 - // Using kstat to determine the machine type. 5.320 - kstat_ctl_t* kc = kstat_open(); 5.321 - kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, NULL); 5.322 - const char* implementation = "UNKNOWN"; 5.323 - if (ksp != NULL) { 5.324 - if (kstat_read(kc, ksp, NULL) != -1 && ksp->ks_data != NULL) { 5.325 - kstat_named_t* knm = (kstat_named_t *)ksp->ks_data; 5.326 - for (int i = 0; i < ksp->ks_ndata; i++) { 5.327 - if (strcmp((const char*)&(knm[i].name),"implementation") == 0) { 5.328 -#ifndef KSTAT_DATA_STRING 5.329 -#define KSTAT_DATA_STRING 9 5.330 -#endif 5.331 - if (knm[i].data_type == KSTAT_DATA_CHAR) { 5.332 - // VM is running on Solaris 8 which does not have value.str. 5.333 - implementation = &(knm[i].value.c[0]); 5.334 - } else if (knm[i].data_type == KSTAT_DATA_STRING) { 5.335 - // VM is running on Solaris 10. 5.336 -#ifndef KSTAT_NAMED_STR_PTR 5.337 - // Solaris 8 was used to build VM, define the structure it misses. 5.338 - struct str_t { 5.339 - union { 5.340 - char *ptr; /* NULL-term string */ 5.341 - char __pad[8]; /* 64-bit padding */ 5.342 - } addr; 5.343 - uint32_t len; /* # bytes for strlen + '\0' */ 5.344 - }; 5.345 -#define KSTAT_NAMED_STR_PTR(knptr) (( (str_t*)&((knptr)->value) )->addr.ptr) 5.346 -#endif 5.347 - implementation = KSTAT_NAMED_STR_PTR(&knm[i]); 5.348 - } 5.349 -#ifndef PRODUCT 5.350 - if (PrintMiscellaneous && Verbose) { 5.351 - tty->print_cr("cpu_info.implementation: %s", implementation); 5.352 - } 5.353 -#endif 5.354 - // Convert to UPPER case before compare. 5.355 - char* impl = strdup(implementation); 5.356 - 5.357 - for (int i = 0; impl[i] != 0; i++) 5.358 - impl[i] = (char)toupper((uint)impl[i]); 5.359 - if (strstr(impl, "SPARC64") != NULL) { 5.360 - features |= sparc64_family_m; 5.361 - } else if (strstr(impl, "SPARC-M") != NULL) { 5.362 - // M-series SPARC is based on T-series. 5.363 - features |= (M_family_m | T_family_m); 5.364 - } else if (strstr(impl, "SPARC-T") != NULL) { 5.365 - features |= T_family_m; 5.366 - if (strstr(impl, "SPARC-T1") != NULL) { 5.367 - features |= T1_model_m; 5.368 - } 5.369 - } else { 5.370 - if (strstr(impl, "SPARC") == NULL) { 5.371 -#ifndef PRODUCT 5.372 - // kstat on Solaris 8 virtual machines (branded zones) 5.373 - // returns "(unsupported)" implementation. 5.374 - warning("kstat cpu_info implementation = '%s', should contain SPARC", impl); 5.375 -#endif 5.376 - implementation = "SPARC"; 5.377 - } 5.378 - } 5.379 - free((void*)impl); 5.380 - break; 5.381 - } 5.382 - } // for( 5.383 - } 5.384 + Sysconf l2_dcache_line_size(_SC_L2CACHE_LINESZ); 5.385 + if (l2_dcache_line_size.valid()) { 5.386 + _L2_data_cache_line_size = l2_dcache_line_size.value(); 5.387 } 5.388 - assert(strcmp(implementation, "UNKNOWN") != 0, 5.389 - "unknown cpu info (changed kstat interface?)"); 5.390 - kstat_close(kc); 5.391 + } else { 5.392 + // Otherwise figure out the cache line sizes using PICL 5.393 + bool is_fujitsu = (features & sparc64_family_m) != 0; 5.394 + PICL picl(is_fujitsu, is_sun4v); 5.395 + _L1_data_cache_line_size = picl.L1_data_cache_line_size(); 5.396 + _L2_data_cache_line_size = picl.L2_data_cache_line_size(); 5.397 } 5.398 - 5.399 - // Figure out cache line sizes using PICL 5.400 - PICL picl((features & sparc64_family_m) != 0, (features & sun4v_m) != 0); 5.401 - _L2_data_cache_line_size = picl.L2_data_cache_line_size(); 5.402 - 5.403 return features; 5.404 }
6.1 --- a/src/share/vm/c1/c1_LIR.cpp Mon Mar 27 08:21:39 2017 -0700 6.2 +++ b/src/share/vm/c1/c1_LIR.cpp Wed Apr 12 09:03:26 2017 -0700 6.3 @@ -1516,6 +1516,17 @@ 6.4 append(c); 6.5 } 6.6 6.7 +void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) { 6.8 + if (deoptimize_on_null) { 6.9 + // Emit an explicit null check and deoptimize if opr is null 6.10 + CodeStub* deopt = new DeoptimizeStub(info); 6.11 + cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL)); 6.12 + branch(lir_cond_equal, T_OBJECT, deopt); 6.13 + } else { 6.14 + // Emit an implicit null check 6.15 + append(new LIR_Op1(lir_null_check, opr, info)); 6.16 + } 6.17 +} 6.18 6.19 void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, 6.20 LIR_Opr t1, LIR_Opr t2, LIR_Opr result) {
7.1 --- a/src/share/vm/c1/c1_LIR.hpp Mon Mar 27 08:21:39 2017 -0700 7.2 +++ b/src/share/vm/c1/c1_LIR.hpp Wed Apr 12 09:03:26 2017 -0700 7.3 @@ -2153,7 +2153,7 @@ 7.4 void pack64(LIR_Opr src, LIR_Opr dst) { append(new LIR_Op1(lir_pack64, src, dst, T_LONG, lir_patch_none, NULL)); } 7.5 void unpack64(LIR_Opr src, LIR_Opr dst) { append(new LIR_Op1(lir_unpack64, src, dst, T_LONG, lir_patch_none, NULL)); } 7.6 7.7 - void null_check(LIR_Opr opr, CodeEmitInfo* info) { append(new LIR_Op1(lir_null_check, opr, info)); } 7.8 + void null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null = false); 7.9 void throw_exception(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { 7.10 append(new LIR_Op2(lir_throw, exceptionPC, exceptionOop, LIR_OprFact::illegalOpr, info)); 7.11 }
8.1 --- a/src/share/vm/c1/c1_LIRGenerator.cpp Mon Mar 27 08:21:39 2017 -0700 8.2 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Wed Apr 12 09:03:26 2017 -0700 8.3 @@ -1700,8 +1700,10 @@ 8.4 if (x->needs_null_check() && 8.5 (needs_patching || 8.6 MacroAssembler::needs_explicit_null_check(x->offset()))) { 8.7 - // emit an explicit null check because the offset is too large 8.8 - __ null_check(object.result(), new CodeEmitInfo(info)); 8.9 + // Emit an explicit null check because the offset is too large. 8.10 + // If the class is not loaded and the object is NULL, we need to deoptimize to throw a 8.11 + // NoClassDefFoundError in the interpreter instead of an implicit NPE from compiled code. 8.12 + __ null_check(object.result(), new CodeEmitInfo(info), /* deoptimize */ needs_patching); 8.13 } 8.14 8.15 LIR_Address* address; 8.16 @@ -1785,8 +1787,10 @@ 8.17 obj = new_register(T_OBJECT); 8.18 __ move(LIR_OprFact::oopConst(NULL), obj); 8.19 } 8.20 - // emit an explicit null check because the offset is too large 8.21 - __ null_check(obj, new CodeEmitInfo(info)); 8.22 + // Emit an explicit null check because the offset is too large. 8.23 + // If the class is not loaded and the object is NULL, we need to deoptimize to throw a 8.24 + // NoClassDefFoundError in the interpreter instead of an implicit NPE from compiled code. 8.25 + __ null_check(obj, new CodeEmitInfo(info), /* deoptimize */ needs_patching); 8.26 } 8.27 8.28 LIR_Opr reg = rlock_result(x, field_type);
9.1 --- a/src/share/vm/code/nmethod.cpp Mon Mar 27 08:21:39 2017 -0700 9.2 +++ b/src/share/vm/code/nmethod.cpp Wed Apr 12 09:03:26 2017 -0700 9.3 @@ -1,5 +1,5 @@ 9.4 /* 9.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 9.6 + * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 9.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 9.8 * 9.9 * This code is free software; you can redistribute it and/or modify it 9.10 @@ -254,7 +254,8 @@ 9.11 9.12 9.13 address ExceptionCache::test_address(address addr) { 9.14 - for (int i=0; i<count(); i++) { 9.15 + int limit = count(); 9.16 + for (int i = 0; i < limit; i++) { 9.17 if (pc_at(i) == addr) { 9.18 return handler_at(i); 9.19 } 9.20 @@ -265,9 +266,11 @@ 9.21 9.22 bool ExceptionCache::add_address_and_handler(address addr, address handler) { 9.23 if (test_address(addr) == handler) return true; 9.24 - if (count() < cache_size) { 9.25 - set_pc_at(count(),addr); 9.26 - set_handler_at(count(), handler); 9.27 + 9.28 + int index = count(); 9.29 + if (index < cache_size) { 9.30 + set_pc_at(index, addr); 9.31 + set_handler_at(index, handler); 9.32 increment_count(); 9.33 return true; 9.34 } 9.35 @@ -380,10 +383,11 @@ 9.36 assert(new_entry != NULL,"Must be non null"); 9.37 assert(new_entry->next() == NULL, "Must be null"); 9.38 9.39 - if (exception_cache() != NULL) { 9.40 - new_entry->set_next(exception_cache()); 9.41 + ExceptionCache *ec = exception_cache(); 9.42 + if (ec != NULL) { 9.43 + new_entry->set_next(ec); 9.44 } 9.45 - set_exception_cache(new_entry); 9.46 + release_set_exception_cache(new_entry); 9.47 } 9.48 9.49 void nmethod::clean_exception_cache(BoolObjectClosure* is_alive) {
10.1 --- a/src/share/vm/code/nmethod.hpp Mon Mar 27 08:21:39 2017 -0700 10.2 +++ b/src/share/vm/code/nmethod.hpp Wed Apr 12 09:03:26 2017 -0700 10.3 @@ -1,5 +1,5 @@ 10.4 /* 10.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 10.6 + * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 10.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 10.8 * 10.9 * This code is free software; you can redistribute it and/or modify it 10.10 @@ -39,15 +39,16 @@ 10.11 Klass* _exception_type; 10.12 address _pc[cache_size]; 10.13 address _handler[cache_size]; 10.14 - int _count; 10.15 + volatile int _count; 10.16 ExceptionCache* _next; 10.17 10.18 address pc_at(int index) { assert(index >= 0 && index < count(),""); return _pc[index]; } 10.19 void set_pc_at(int index, address a) { assert(index >= 0 && index < cache_size,""); _pc[index] = a; } 10.20 address handler_at(int index) { assert(index >= 0 && index < count(),""); return _handler[index]; } 10.21 void set_handler_at(int index, address a) { assert(index >= 0 && index < cache_size,""); _handler[index] = a; } 10.22 - int count() { return _count; } 10.23 - void increment_count() { _count++; } 10.24 + int count() { return OrderAccess::load_acquire(&_count); } 10.25 + // increment_count is only called under lock, but there may be concurrent readers. 10.26 + void increment_count() { OrderAccess::release_store(&_count, _count + 1); } 10.27 10.28 public: 10.29 10.30 @@ -237,7 +238,7 @@ 10.31 // counter is decreased (by 1) while sweeping. 10.32 int _hotness_counter; 10.33 10.34 - ExceptionCache *_exception_cache; 10.35 + ExceptionCache * volatile _exception_cache; 10.36 PcDescCache _pc_desc_cache; 10.37 10.38 // These are used for compiled synchronized native methods to 10.39 @@ -433,7 +434,7 @@ 10.40 10.41 // flag accessing and manipulation 10.42 bool is_in_use() const { return _state == in_use; } 10.43 - bool is_alive() const { return _state == in_use || _state == not_entrant; } 10.44 + bool is_alive() const { unsigned char s = _state; return s == in_use || s == not_entrant; } 10.45 bool is_not_entrant() const { return _state == not_entrant; } 10.46 bool is_zombie() const { return _state == zombie; } 10.47 bool is_unloaded() const { return _state == unloaded; } 10.48 @@ -555,8 +556,10 @@ 10.49 void set_stack_traversal_mark(long l) { _stack_traversal_mark = l; } 10.50 10.51 // Exception cache support 10.52 + // Note: _exception_cache may be read concurrently. We rely on memory_order_consume here. 10.53 ExceptionCache* exception_cache() const { return _exception_cache; } 10.54 void set_exception_cache(ExceptionCache *ec) { _exception_cache = ec; } 10.55 + void release_set_exception_cache(ExceptionCache *ec) { OrderAccess::release_store_ptr(&_exception_cache, ec); } 10.56 address handler_for_exception_and_pc(Handle exception, address pc); 10.57 void add_handler_for_exception_and_pc(Handle exception, address pc, address handler); 10.58 void clean_exception_cache(BoolObjectClosure* is_alive);
11.1 --- a/src/share/vm/opto/connode.cpp Mon Mar 27 08:21:39 2017 -0700 11.2 +++ b/src/share/vm/opto/connode.cpp Wed Apr 12 09:03:26 2017 -0700 11.3 @@ -999,8 +999,7 @@ 11.4 } 11.5 11.6 #ifdef _LP64 11.7 - // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) or 11.8 - // ConvI2L(CastII(AddI(x, y))) to AddL(ConvI2L(CastII(x)), ConvI2L(CastII(y))), 11.9 + // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) 11.10 // but only if x and y have subranges that cannot cause 32-bit overflow, 11.11 // under the assumption that x+y is in my own subrange this->type(). 11.12 11.13 @@ -1024,13 +1023,6 @@ 11.14 11.15 Node* z = in(1); 11.16 int op = z->Opcode(); 11.17 - Node* ctrl = NULL; 11.18 - if (op == Op_CastII && z->as_CastII()->has_range_check()) { 11.19 - // Skip CastII node but save control dependency 11.20 - ctrl = z->in(0); 11.21 - z = z->in(1); 11.22 - op = z->Opcode(); 11.23 - } 11.24 if (op == Op_AddI || op == Op_SubI) { 11.25 Node* x = z->in(1); 11.26 Node* y = z->in(2); 11.27 @@ -1090,8 +1082,8 @@ 11.28 } 11.29 assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow"); 11.30 assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow"); 11.31 - Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), ctrl); 11.32 - Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), ctrl); 11.33 + Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), NULL); 11.34 + Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), NULL); 11.35 switch (op) { 11.36 case Op_AddI: return new (phase->C) AddLNode(cx, cy); 11.37 case Op_SubI: return new (phase->C) SubLNode(cx, cy);
12.1 --- a/src/share/vm/prims/jni.cpp Mon Mar 27 08:21:39 2017 -0700 12.2 +++ b/src/share/vm/prims/jni.cpp Wed Apr 12 09:03:26 2017 -0700 12.3 @@ -5129,6 +5129,7 @@ 12.4 run_unit_test(TestKlass_test()); 12.5 run_unit_test(Test_linked_list()); 12.6 run_unit_test(TestChunkedList_test()); 12.7 + run_unit_test(ObjectMonitor::sanity_checks()); 12.8 #if INCLUDE_VM_STRUCTS 12.9 run_unit_test(VMStructs::test()); 12.10 #endif
13.1 --- a/src/share/vm/runtime/objectMonitor.cpp Mon Mar 27 08:21:39 2017 -0700 13.2 +++ b/src/share/vm/runtime/objectMonitor.cpp Wed Apr 12 09:03:26 2017 -0700 13.3 @@ -2529,6 +2529,10 @@ 13.4 SETKNOB(FastHSSEC) ; 13.5 #undef SETKNOB 13.6 13.7 + if (Knob_Verbose) { 13.8 + sanity_checks(); 13.9 + } 13.10 + 13.11 if (os::is_MP()) { 13.12 BackOffMask = (1 << Knob_SpinBackOff) - 1 ; 13.13 if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ; 13.14 @@ -2549,6 +2553,66 @@ 13.15 InitDone = 1 ; 13.16 } 13.17 13.18 +void ObjectMonitor::sanity_checks() { 13.19 + int error_cnt = 0; 13.20 + int warning_cnt = 0; 13.21 + bool verbose = Knob_Verbose != 0 NOT_PRODUCT(|| VerboseInternalVMTests); 13.22 + 13.23 + if (verbose) { 13.24 + tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT, 13.25 + sizeof(ObjectMonitor)); 13.26 + } 13.27 + 13.28 + uint cache_line_size = VM_Version::L1_data_cache_line_size(); 13.29 + if (verbose) { 13.30 + tty->print_cr("INFO: L1_data_cache_line_size=%u", cache_line_size); 13.31 + } 13.32 + 13.33 + ObjectMonitor dummy; 13.34 + u_char *addr_begin = (u_char*)&dummy; 13.35 + u_char *addr_header = (u_char*)&dummy._header; 13.36 + u_char *addr_owner = (u_char*)&dummy._owner; 13.37 + 13.38 + uint offset_header = (uint)(addr_header - addr_begin); 13.39 + if (verbose) tty->print_cr("INFO: offset(_header)=%u", offset_header); 13.40 + 13.41 + uint offset_owner = (uint)(addr_owner - addr_begin); 13.42 + if (verbose) tty->print_cr("INFO: offset(_owner)=%u", offset_owner); 13.43 + 13.44 + if ((uint)(addr_header - addr_begin) != 0) { 13.45 + tty->print_cr("ERROR: offset(_header) must be zero (0)."); 13.46 + error_cnt++; 13.47 + } 13.48 + 13.49 + if (cache_line_size != 0) { 13.50 + // We were able to determine the L1 data cache line size so 13.51 + // do some cache line specific sanity checks 13.52 + 13.53 + if ((offset_owner - offset_header) < cache_line_size) { 13.54 + tty->print_cr("WARNING: the _header and _owner fields are closer " 13.55 + "than a cache line which permits false sharing."); 13.56 + warning_cnt++; 13.57 + } 13.58 + 13.59 + if ((sizeof(ObjectMonitor) % cache_line_size) != 0) { 13.60 + tty->print_cr("WARNING: ObjectMonitor size is not a multiple of " 13.61 + "a cache line which permits false sharing."); 13.62 + warning_cnt++; 13.63 + } 13.64 + } 13.65 + 13.66 + ObjectSynchronizer::sanity_checks(verbose, cache_line_size, &error_cnt, 13.67 + &warning_cnt); 13.68 + 13.69 + if (verbose || error_cnt != 0 || warning_cnt != 0) { 13.70 + tty->print_cr("INFO: error_cnt=%d", error_cnt); 13.71 + tty->print_cr("INFO: warning_cnt=%d", warning_cnt); 13.72 + } 13.73 + 13.74 + guarantee(error_cnt == 0, 13.75 + "Fatal error(s) found in ObjectMonitor::sanity_checks()"); 13.76 +} 13.77 + 13.78 #ifndef PRODUCT 13.79 void ObjectMonitor::verify() { 13.80 }
14.1 --- a/src/share/vm/runtime/objectMonitor.hpp Mon Mar 27 08:21:39 2017 -0700 14.2 +++ b/src/share/vm/runtime/objectMonitor.hpp Wed Apr 12 09:03:26 2017 -0700 14.3 @@ -189,6 +189,8 @@ 14.4 bool check(TRAPS); // true if the thread owns the monitor. 14.5 void check_slow(TRAPS); 14.6 void clear(); 14.7 + static void sanity_checks(); // public for -XX:+ExecuteInternalVMTests 14.8 + // in PRODUCT for -XX:SyncKnobs=Verbose=1 14.9 #ifndef PRODUCT 14.10 void verify(); 14.11 void print(); 14.12 @@ -234,8 +236,6 @@ 14.13 14.14 // WARNING: this must be the very first word of ObjectMonitor 14.15 // This means this class can't use any virtual member functions. 14.16 - // TODO-FIXME: assert that offsetof(_header) is 0 or get rid of the 14.17 - // implicit 0 offset in emitted code. 14.18 14.19 volatile markOop _header; // displaced object header word - mark 14.20 void* volatile _object; // backward object pointer - strong root
15.1 --- a/src/share/vm/runtime/synchronizer.cpp Mon Mar 27 08:21:39 2017 -0700 15.2 +++ b/src/share/vm/runtime/synchronizer.cpp Wed Apr 12 09:03:26 2017 -0700 15.3 @@ -437,19 +437,22 @@ 15.4 // Hash Code handling 15.5 // 15.6 // Performance concern: 15.7 -// OrderAccess::storestore() calls release() which STs 0 into the global volatile 15.8 -// OrderAccess::Dummy variable. This store is unnecessary for correctness. 15.9 -// Many threads STing into a common location causes considerable cache migration 15.10 -// or "sloshing" on large SMP system. As such, I avoid using OrderAccess::storestore() 15.11 -// until it's repaired. In some cases OrderAccess::fence() -- which incurs local 15.12 -// latency on the executing processor -- is a better choice as it scales on SMP 15.13 -// systems. See http://blogs.sun.com/dave/entry/biased_locking_in_hotspot for a 15.14 -// discussion of coherency costs. Note that all our current reference platforms 15.15 -// provide strong ST-ST order, so the issue is moot on IA32, x64, and SPARC. 15.16 +// OrderAccess::storestore() calls release() which at one time stored 0 15.17 +// into the global volatile OrderAccess::dummy variable. This store was 15.18 +// unnecessary for correctness. Many threads storing into a common location 15.19 +// causes considerable cache migration or "sloshing" on large SMP systems. 15.20 +// As such, I avoided using OrderAccess::storestore(). In some cases 15.21 +// OrderAccess::fence() -- which incurs local latency on the executing 15.22 +// processor -- is a better choice as it scales on SMP systems. 15.23 +// 15.24 +// See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for 15.25 +// a discussion of coherency costs. Note that all our current reference 15.26 +// platforms provide strong ST-ST order, so the issue is moot on IA32, 15.27 +// x64, and SPARC. 15.28 // 15.29 // As a general policy we use "volatile" to control compiler-based reordering 15.30 -// and explicit fences (barriers) to control for architectural reordering performed 15.31 -// by the CPU(s) or platform. 15.32 +// and explicit fences (barriers) to control for architectural reordering 15.33 +// performed by the CPU(s) or platform. 15.34 15.35 struct SharedGlobals { 15.36 // These are highly shared mostly-read variables. 15.37 @@ -1636,7 +1639,55 @@ 15.38 } 15.39 15.40 //------------------------------------------------------------------------------ 15.41 -// Non-product code 15.42 +// Debugging code 15.43 + 15.44 +void ObjectSynchronizer::sanity_checks(const bool verbose, 15.45 + const uint cache_line_size, 15.46 + int *error_cnt_ptr, 15.47 + int *warning_cnt_ptr) { 15.48 + u_char *addr_begin = (u_char*)&GVars; 15.49 + u_char *addr_stwRandom = (u_char*)&GVars.stwRandom; 15.50 + u_char *addr_hcSequence = (u_char*)&GVars.hcSequence; 15.51 + 15.52 + if (verbose) { 15.53 + tty->print_cr("INFO: sizeof(SharedGlobals)=" SIZE_FORMAT, 15.54 + sizeof(SharedGlobals)); 15.55 + } 15.56 + 15.57 + uint offset_stwRandom = (uint)(addr_stwRandom - addr_begin); 15.58 + if (verbose) tty->print_cr("INFO: offset(stwRandom)=%u", offset_stwRandom); 15.59 + 15.60 + uint offset_hcSequence = (uint)(addr_hcSequence - addr_begin); 15.61 + if (verbose) { 15.62 + tty->print_cr("INFO: offset(_hcSequence)=%u", offset_hcSequence); 15.63 + } 15.64 + 15.65 + if (cache_line_size != 0) { 15.66 + // We were able to determine the L1 data cache line size so 15.67 + // do some cache line specific sanity checks 15.68 + 15.69 + if (offset_stwRandom < cache_line_size) { 15.70 + tty->print_cr("WARNING: the SharedGlobals.stwRandom field is closer " 15.71 + "to the struct beginning than a cache line which permits " 15.72 + "false sharing."); 15.73 + (*warning_cnt_ptr)++; 15.74 + } 15.75 + 15.76 + if ((offset_hcSequence - offset_stwRandom) < cache_line_size) { 15.77 + tty->print_cr("WARNING: the SharedGlobals.stwRandom and " 15.78 + "SharedGlobals.hcSequence fields are closer than a cache " 15.79 + "line which permits false sharing."); 15.80 + (*warning_cnt_ptr)++; 15.81 + } 15.82 + 15.83 + if ((sizeof(SharedGlobals) - offset_hcSequence) < cache_line_size) { 15.84 + tty->print_cr("WARNING: the SharedGlobals.hcSequence field is closer " 15.85 + "to the struct end than a cache line which permits false " 15.86 + "sharing."); 15.87 + (*warning_cnt_ptr)++; 15.88 + } 15.89 + } 15.90 +} 15.91 15.92 #ifndef PRODUCT 15.93
16.1 --- a/src/share/vm/runtime/synchronizer.hpp Mon Mar 27 08:21:39 2017 -0700 16.2 +++ b/src/share/vm/runtime/synchronizer.hpp Wed Apr 12 09:03:26 2017 -0700 16.3 @@ -121,6 +121,9 @@ 16.4 static void oops_do(OopClosure* f); 16.5 16.6 // debugging 16.7 + static void sanity_checks(const bool verbose, 16.8 + const unsigned int cache_line_size, 16.9 + int *error_cnt_ptr, int *warning_cnt_ptr); 16.10 static void verify() PRODUCT_RETURN; 16.11 static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0; 16.12
17.1 --- a/src/share/vm/runtime/vmStructs.cpp Mon Mar 27 08:21:39 2017 -0700 17.2 +++ b/src/share/vm/runtime/vmStructs.cpp Wed Apr 12 09:03:26 2017 -0700 17.3 @@ -1,5 +1,5 @@ 17.4 /* 17.5 - * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. 17.6 + * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. 17.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 17.8 * 17.9 * This code is free software; you can redistribute it and/or modify it 17.10 @@ -879,7 +879,7 @@ 17.11 nonstatic_field(nmethod, _stack_traversal_mark, long) \ 17.12 nonstatic_field(nmethod, _compile_id, int) \ 17.13 nonstatic_field(nmethod, _comp_level, int) \ 17.14 - nonstatic_field(nmethod, _exception_cache, ExceptionCache*) \ 17.15 + volatile_nonstatic_field(nmethod, _exception_cache, ExceptionCache*) \ 17.16 nonstatic_field(nmethod, _marked_for_deoptimization, bool) \ 17.17 \ 17.18 unchecked_c2_static_field(Deoptimization, _trap_reason_name, void*) \
18.1 --- a/src/share/vm/runtime/vm_version.cpp Mon Mar 27 08:21:39 2017 -0700 18.2 +++ b/src/share/vm/runtime/vm_version.cpp Wed Apr 12 09:03:26 2017 -0700 18.3 @@ -50,6 +50,7 @@ 18.4 bool Abstract_VM_Version::_supports_atomic_getadd4 = false; 18.5 bool Abstract_VM_Version::_supports_atomic_getadd8 = false; 18.6 unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U; 18.7 +unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0; 18.8 int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0; 18.9 18.10 #ifndef HOTSPOT_RELEASE_VERSION
19.1 --- a/src/share/vm/runtime/vm_version.hpp Mon Mar 27 08:21:39 2017 -0700 19.2 +++ b/src/share/vm/runtime/vm_version.hpp Wed Apr 12 09:03:26 2017 -0700 19.3 @@ -1,5 +1,5 @@ 19.4 /* 19.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 19.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 19.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 19.8 * 19.9 * This code is free software; you can redistribute it and/or modify it 19.10 @@ -42,6 +42,7 @@ 19.11 static bool _supports_atomic_getadd4; 19.12 static bool _supports_atomic_getadd8; 19.13 static unsigned int _logical_processors_per_package; 19.14 + static unsigned int _L1_data_cache_line_size; 19.15 static int _vm_major_version; 19.16 static int _vm_minor_version; 19.17 static int _vm_build_number; 19.18 @@ -114,6 +115,10 @@ 19.19 return _logical_processors_per_package; 19.20 } 19.21 19.22 + static unsigned int L1_data_cache_line_size() { 19.23 + return _L1_data_cache_line_size; 19.24 + } 19.25 + 19.26 // Need a space at the end of TLAB for prefetch instructions 19.27 // which may fault when accessing memory outside of heap. 19.28 static int reserve_for_allocation_prefetch() {
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 20.2 +++ b/test/compiler/c1/TestUnresolvedField.jasm Wed Apr 12 09:03:26 2017 -0700 20.3 @@ -0,0 +1,38 @@ 20.4 +/* 20.5 + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 20.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 20.7 + * 20.8 + * This code is free software; you can redistribute it and/or modify it 20.9 + * under the terms of the GNU General Public License version 2 only, as 20.10 + * published by the Free Software Foundation. 20.11 + * 20.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 20.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 20.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 20.15 + * version 2 for more details (a copy is included in the LICENSE file that 20.16 + * accompanied this code). 20.17 + * 20.18 + * You should have received a copy of the GNU General Public License version 20.19 + * 2 along with this work; if not, write to the Free Software Foundation, 20.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20.21 + * 20.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20.23 + * or visit www.oracle.com if you need additional information or have any 20.24 + * questions. 20.25 + * 20.26 + */ 20.27 + 20.28 +public class compiler/c1/TestUnresolvedField version 52:0 { 20.29 + public static Method testGetField:"()V" stack 1 locals 1 { 20.30 + aconst_null; 20.31 + getfield Field T.f:I; // T does not exist 20.32 + return; 20.33 + } 20.34 + 20.35 + public static Method testPutField:"()V" stack 2 locals 1 { 20.36 + aconst_null; 20.37 + iconst_0; 20.38 + putfield Field T.f:I; // T does not exist 20.39 + return; 20.40 + } 20.41 +}
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/test/compiler/c1/TestUnresolvedFieldMain.java Wed Apr 12 09:03:26 2017 -0700 21.3 @@ -0,0 +1,48 @@ 21.4 +/* 21.5 + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 21.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 21.7 + * 21.8 + * This code is free software; you can redistribute it and/or modify it 21.9 + * under the terms of the GNU General Public License version 2 only, as 21.10 + * published by the Free Software Foundation. 21.11 + * 21.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 21.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 21.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 21.15 + * version 2 for more details (a copy is included in the LICENSE file that 21.16 + * accompanied this code). 21.17 + * 21.18 + * You should have received a copy of the GNU General Public License version 21.19 + * 2 along with this work; if not, write to the Free Software Foundation, 21.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21.21 + * 21.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21.23 + * or visit www.oracle.com if you need additional information or have any 21.24 + * questions. 21.25 + */ 21.26 + 21.27 +/* 21.28 + * @test 21.29 + * @bug 8173373 21.30 + * @compile TestUnresolvedField.jasm 21.31 + * @run main/othervm -XX:TieredStopAtLevel=1 -Xcomp 21.32 + * -XX:CompileCommand=compileonly,compiler.c1.TestUnresolvedField::test* 21.33 + * compiler.c1.TestUnresolvedFieldMain 21.34 + */ 21.35 + 21.36 +package compiler.c1; 21.37 + 21.38 +public class TestUnresolvedFieldMain { 21.39 + public static void main(String[] args) { 21.40 + try { 21.41 + TestUnresolvedField.testGetField(); 21.42 + } catch (java.lang.NoClassDefFoundError error) { 21.43 + // Expected 21.44 + } 21.45 + try { 21.46 + TestUnresolvedField.testPutField(); 21.47 + } catch (java.lang.NoClassDefFoundError error) { 21.48 + // Expected 21.49 + } 21.50 + } 21.51 +}
22.1 --- a/test/compiler/loopopts/TestLoopPeeling.java Mon Mar 27 08:21:39 2017 -0700 22.2 +++ b/test/compiler/loopopts/TestLoopPeeling.java Wed Apr 12 09:03:26 2017 -0700 22.3 @@ -1,5 +1,5 @@ 22.4 /* 22.5 - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 22.6 + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 22.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 22.8 * 22.9 * This code is free software; you can redistribute it and/or modify it 22.10 @@ -23,10 +23,16 @@ 22.11 22.12 /* 22.13 * @test 22.14 - * @bug 8078262 22.15 + * @bug 8078262 8177095 22.16 * @summary Tests correct dominator information after loop peeling. 22.17 - * @run main/othervm -Xcomp -XX:CompileCommand=compileonly,TestLoopPeeling::test* TestLoopPeeling 22.18 + * 22.19 + * @run main/othervm -Xcomp 22.20 + * -XX:CompileCommand=compileonly,compiler.loopopts.TestLoopPeeling::test* 22.21 + * compiler.loopopts.TestLoopPeeling 22.22 */ 22.23 + 22.24 +package compiler.loopopts; 22.25 + 22.26 public class TestLoopPeeling { 22.27 22.28 public int[] array = new int[100]; 22.29 @@ -34,14 +40,16 @@ 22.30 public static void main(String args[]) { 22.31 TestLoopPeeling test = new TestLoopPeeling(); 22.32 try { 22.33 - test.testArrayAccess(0, 1); 22.34 + test.testArrayAccess1(0, 1); 22.35 + test.testArrayAccess2(0); 22.36 + test.testArrayAccess3(0, false); 22.37 test.testArrayAllocation(0, 1); 22.38 } catch (Exception e) { 22.39 // Ignore exceptions 22.40 } 22.41 } 22.42 22.43 - public void testArrayAccess(int index, int inc) { 22.44 + public void testArrayAccess1(int index, int inc) { 22.45 int storeIndex = -1; 22.46 22.47 for (; index < 10; index += inc) { 22.48 @@ -57,7 +65,7 @@ 22.49 22.50 if (index == 42) { 22.51 // This store and the corresponding range check are moved out of the 22.52 - // loop and both used after old loop and the peeled iteration exit. 22.53 + // loop and both used after main loop and the peeled iteration exit. 22.54 // For the peeled iteration, storeIndex is always -1 and the ConvI2L 22.55 // is replaced by TOP. However, the range check is not folded because 22.56 // we don't do the split if optimization in PhaseIdealLoop2. 22.57 @@ -71,6 +79,44 @@ 22.58 } 22.59 } 22.60 22.61 + public int testArrayAccess2(int index) { 22.62 + // Load1 and the corresponding range check are moved out of the loop 22.63 + // and both are used after the main loop and the peeled iteration exit. 22.64 + // For the peeled iteration, storeIndex is always Integer.MIN_VALUE and 22.65 + // for the main loop it is 0. Hence, the merging phi has type int:<=0. 22.66 + // Load1 reads the array at index ConvI2L(CastII(AddI(storeIndex, -1))) 22.67 + // where the CastII is range check dependent and has type int:>=0. 22.68 + // The CastII gets pushed through the AddI and its type is changed to int:>=1 22.69 + // which does not overlap with the input type of storeIndex (int:<=0). 22.70 + // The CastII is replaced by TOP causing a cascade of other eliminations. 22.71 + // Since the control path through the range check CmpU(AddI(storeIndex, -1)) 22.72 + // is not eliminated, the graph is in a corrupted state. We fail once we merge 22.73 + // with the result of Load2 because we get data from a non-dominating region. 22.74 + int storeIndex = Integer.MIN_VALUE; 22.75 + for (; index < 10; ++index) { 22.76 + if (index == 42) { 22.77 + return array[storeIndex-1]; // Load1 22.78 + } 22.79 + storeIndex = 0; 22.80 + } 22.81 + return array[42]; // Load2 22.82 + } 22.83 + 22.84 + public int testArrayAccess3(int index, boolean b) { 22.85 + // Same as testArrayAccess2 but manifests as crash in register allocator. 22.86 + int storeIndex = Integer.MIN_VALUE; 22.87 + for (; index < 10; ++index) { 22.88 + if (b) { 22.89 + return 0; 22.90 + } 22.91 + if (index == 42) { 22.92 + return array[storeIndex-1]; // Load1 22.93 + } 22.94 + storeIndex = 0; 22.95 + } 22.96 + return array[42]; // Load2 22.97 + } 22.98 + 22.99 public byte[] testArrayAllocation(int index, int inc) { 22.100 int allocationCount = -1; 22.101 byte[] result; 22.102 @@ -82,7 +128,7 @@ 22.103 22.104 if (index == 42) { 22.105 // This allocation and the corresponding size check are moved out of the 22.106 - // loop and both used after old loop and the peeled iteration exit. 22.107 + // loop and both used after main loop and the peeled iteration exit. 22.108 // For the peeled iteration, allocationCount is always -1 and the ConvI2L 22.109 // is replaced by TOP. However, the size check is not folded because 22.110 // we don't do the split if optimization in PhaseIdealLoop2.