Fri, 17 Mar 2017 03:39:23 -0700
8049717: expose L1_data_cache_line_size for diagnostic/sanity checks
Summary: Add support for VM_Version::L1_data_cache_line_size().
Reviewed-by: dsimms, kvn, dholmes
1.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Aug 13 10:44:50 2014 +0200 1.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Mar 17 03:39:23 2017 -0700 1.3 @@ -259,6 +259,49 @@ 1.4 // buf is started with ", " or is empty 1.5 _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf); 1.6 1.7 + // There are three 64-bit SPARC families that do not overlap, e.g., 1.8 + // both is_ultra3() and is_sparc64() cannot be true at the same time. 1.9 + // Within these families, there can be more than one chip, e.g., 1.10 + // is_T4() and is_T7() machines are also is_niagara(). 1.11 + if (is_ultra3()) { 1.12 + assert(_L1_data_cache_line_size == 0, "overlap with Ultra3 family"); 1.13 + // Ref: UltraSPARC III Cu Processor 1.14 + _L1_data_cache_line_size = 64; 1.15 + } 1.16 + if (is_niagara()) { 1.17 + assert(_L1_data_cache_line_size == 0, "overlap with niagara family"); 1.18 + // All Niagara's are sun4v's, but not all sun4v's are Niagaras, e.g., 1.19 + // Fujitsu SPARC64 is sun4v, but we don't want it in this block. 1.20 + // 1.21 + // Ref: UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005 1.22 + // Appendix F.1.3.1 Cacheable Accesses 1.23 + // -> 16-byte L1 cache line size 1.24 + // 1.25 + // Ref: UltraSPARC T2: A Highly-Threaded, Power-Efficient, SPARC SOC 1.26 + // Section III: SPARC Processor Core 1.27 + // -> 16-byte L1 cache line size 1.28 + // 1.29 + // Ref: Oracle's SPARC T4-1, SPARC T4-2, SPARC T4-4, and SPARC T4-1B Server Architecture 1.30 + // Section SPARC T4 Processor Cache Architecture 1.31 + // -> 32-byte L1 cache line size (no longer see that info on this ref) 1.32 + // 1.33 + // XXX - still need a T7 reference here 1.34 + // 1.35 + if (is_T7()) { // T7 or newer 1.36 + _L1_data_cache_line_size = 64; 1.37 + } else if (is_T4()) { // T4 or newer (until T7) 1.38 + _L1_data_cache_line_size = 32; 1.39 + } else { // T1 or newer (until T4) 1.40 + _L1_data_cache_line_size = 16; 1.41 + } 1.42 + } 1.43 + if (is_sparc64()) { 1.44 + guarantee(_L1_data_cache_line_size == 0, "overlap with SPARC64 family"); 1.45 + // Ref: Fujitsu SPARC64 VII Processor 1.46 + // Section 4 Cache System 1.47 + _L1_data_cache_line_size = 64; 1.48 + } 1.49 + 1.50 // UseVIS is set to the smallest of what hardware supports and what 1.51 // the command line requires. I.e., you cannot set UseVIS to 3 on 1.52 // older UltraSparc which do not support it. 1.53 @@ -364,6 +407,7 @@ 1.54 1.55 #ifndef PRODUCT 1.56 if (PrintMiscellaneous && Verbose) { 1.57 + tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1.58 tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size()); 1.59 tty->print("Allocation"); 1.60 if (AllocatePrefetchStyle <= 0) {
2.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Wed Aug 13 10:44:50 2014 +0200 2.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Fri Mar 17 03:39:23 2017 -0700 2.3 @@ -406,6 +406,8 @@ 2.4 _stepping = 0; 2.5 _cpuFeatures = 0; 2.6 _logical_processors_per_package = 1; 2.7 + // i486 internal cache is both I&D and has a 16-byte line size 2.8 + _L1_data_cache_line_size = 16; 2.9 2.10 if (!Use486InstrsOnly) { 2.11 // Get raw processor info 2.12 @@ -424,6 +426,7 @@ 2.13 // Logical processors are only available on P4s and above, 2.14 // and only if hyperthreading is available. 2.15 _logical_processors_per_package = logical_processor_count(); 2.16 + _L1_data_cache_line_size = L1_line_size(); 2.17 } 2.18 } 2.19 2.20 @@ -1034,6 +1037,7 @@ 2.21 if (PrintMiscellaneous && Verbose) { 2.22 tty->print_cr("Logical CPUs per core: %u", 2.23 logical_processors_per_package()); 2.24 + tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 2.25 tty->print("UseSSE=%d", (int) UseSSE); 2.26 if (UseAVX > 0) { 2.27 tty->print(" UseAVX=%d", (int) UseAVX);
3.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Wed Aug 13 10:44:50 2014 +0200 3.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Fri Mar 17 03:39:23 2017 -0700 3.3 @@ -1,5 +1,5 @@ 3.4 /* 3.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 3.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 3.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.8 * 3.9 * This code is free software; you can redistribute it and/or modify it 3.10 @@ -595,7 +595,7 @@ 3.11 return (result == 0 ? 1 : result); 3.12 } 3.13 3.14 - static intx prefetch_data_size() { 3.15 + static intx L1_line_size() { 3.16 intx result = 0; 3.17 if (is_intel()) { 3.18 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3.19 @@ -607,6 +607,10 @@ 3.20 return result; 3.21 } 3.22 3.23 + static intx prefetch_data_size() { 3.24 + return L1_line_size(); 3.25 + } 3.26 + 3.27 // 3.28 // Feature identification 3.29 //
4.1 --- a/src/share/vm/prims/jni.cpp Wed Aug 13 10:44:50 2014 +0200 4.2 +++ b/src/share/vm/prims/jni.cpp Fri Mar 17 03:39:23 2017 -0700 4.3 @@ -5129,6 +5129,7 @@ 4.4 run_unit_test(TestKlass_test()); 4.5 run_unit_test(Test_linked_list()); 4.6 run_unit_test(TestChunkedList_test()); 4.7 + run_unit_test(ObjectMonitor::sanity_checks()); 4.8 #if INCLUDE_VM_STRUCTS 4.9 run_unit_test(VMStructs::test()); 4.10 #endif
5.1 --- a/src/share/vm/runtime/objectMonitor.cpp Wed Aug 13 10:44:50 2014 +0200 5.2 +++ b/src/share/vm/runtime/objectMonitor.cpp Fri Mar 17 03:39:23 2017 -0700 5.3 @@ -2529,6 +2529,10 @@ 5.4 SETKNOB(FastHSSEC) ; 5.5 #undef SETKNOB 5.6 5.7 + if (Knob_Verbose) { 5.8 + sanity_checks(); 5.9 + } 5.10 + 5.11 if (os::is_MP()) { 5.12 BackOffMask = (1 << Knob_SpinBackOff) - 1 ; 5.13 if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ; 5.14 @@ -2549,6 +2553,66 @@ 5.15 InitDone = 1 ; 5.16 } 5.17 5.18 +void ObjectMonitor::sanity_checks() { 5.19 + int error_cnt = 0; 5.20 + int warning_cnt = 0; 5.21 + bool verbose = Knob_Verbose != 0 NOT_PRODUCT(|| VerboseInternalVMTests); 5.22 + 5.23 + if (verbose) { 5.24 + tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT, 5.25 + sizeof(ObjectMonitor)); 5.26 + } 5.27 + 5.28 + uint cache_line_size = VM_Version::L1_data_cache_line_size(); 5.29 + if (verbose) { 5.30 + tty->print_cr("INFO: L1_data_cache_line_size=%u", cache_line_size); 5.31 + } 5.32 + 5.33 + ObjectMonitor dummy; 5.34 + u_char *addr_begin = (u_char*)&dummy; 5.35 + u_char *addr_header = (u_char*)&dummy._header; 5.36 + u_char *addr_owner = (u_char*)&dummy._owner; 5.37 + 5.38 + uint offset_header = (uint)(addr_header - addr_begin); 5.39 + if (verbose) tty->print_cr("INFO: offset(_header)=%u", offset_header); 5.40 + 5.41 + uint offset_owner = (uint)(addr_owner - addr_begin); 5.42 + if (verbose) tty->print_cr("INFO: offset(_owner)=%u", offset_owner); 5.43 + 5.44 + if ((uint)(addr_header - addr_begin) != 0) { 5.45 + tty->print_cr("ERROR: offset(_header) must be zero (0)."); 5.46 + error_cnt++; 5.47 + } 5.48 + 5.49 + if (cache_line_size != 0) { 5.50 + // We were able to determine the L1 data cache line size so 5.51 + // do some cache line specific sanity checks 5.52 + 5.53 + if ((offset_owner - offset_header) < cache_line_size) { 5.54 + tty->print_cr("WARNING: the _header and _owner fields are closer " 5.55 + "than a cache line which permits false sharing."); 5.56 + warning_cnt++; 5.57 + } 5.58 + 5.59 + if ((sizeof(ObjectMonitor) % cache_line_size) != 0) { 5.60 + tty->print_cr("WARNING: ObjectMonitor size is not a multiple of " 5.61 + "a cache line which permits false sharing."); 5.62 + warning_cnt++; 5.63 + } 5.64 + } 5.65 + 5.66 + ObjectSynchronizer::sanity_checks(verbose, cache_line_size, &error_cnt, 5.67 + &warning_cnt); 5.68 + 5.69 + if (verbose || error_cnt != 0 || warning_cnt != 0) { 5.70 + tty->print_cr("INFO: error_cnt=%d", error_cnt); 5.71 + tty->print_cr("INFO: warning_cnt=%d", warning_cnt); 5.72 + } 5.73 + 5.74 + guarantee(error_cnt == 0, 5.75 + "Fatal error(s) found in ObjectMonitor::sanity_checks()"); 5.76 +} 5.77 + 5.78 #ifndef PRODUCT 5.79 void ObjectMonitor::verify() { 5.80 }
6.1 --- a/src/share/vm/runtime/objectMonitor.hpp Wed Aug 13 10:44:50 2014 +0200 6.2 +++ b/src/share/vm/runtime/objectMonitor.hpp Fri Mar 17 03:39:23 2017 -0700 6.3 @@ -189,6 +189,8 @@ 6.4 bool check(TRAPS); // true if the thread owns the monitor. 6.5 void check_slow(TRAPS); 6.6 void clear(); 6.7 + static void sanity_checks(); // public for -XX:+ExecuteInternalVMTests 6.8 + // in PRODUCT for -XX:SyncKnobs=Verbose=1 6.9 #ifndef PRODUCT 6.10 void verify(); 6.11 void print(); 6.12 @@ -234,8 +236,6 @@ 6.13 6.14 // WARNING: this must be the very first word of ObjectMonitor 6.15 // This means this class can't use any virtual member functions. 6.16 - // TODO-FIXME: assert that offsetof(_header) is 0 or get rid of the 6.17 - // implicit 0 offset in emitted code. 6.18 6.19 volatile markOop _header; // displaced object header word - mark 6.20 void* volatile _object; // backward object pointer - strong root
7.1 --- a/src/share/vm/runtime/synchronizer.cpp Wed Aug 13 10:44:50 2014 +0200 7.2 +++ b/src/share/vm/runtime/synchronizer.cpp Fri Mar 17 03:39:23 2017 -0700 7.3 @@ -437,19 +437,22 @@ 7.4 // Hash Code handling 7.5 // 7.6 // Performance concern: 7.7 -// OrderAccess::storestore() calls release() which STs 0 into the global volatile 7.8 -// OrderAccess::Dummy variable. This store is unnecessary for correctness. 7.9 -// Many threads STing into a common location causes considerable cache migration 7.10 -// or "sloshing" on large SMP system. As such, I avoid using OrderAccess::storestore() 7.11 -// until it's repaired. In some cases OrderAccess::fence() -- which incurs local 7.12 -// latency on the executing processor -- is a better choice as it scales on SMP 7.13 -// systems. See http://blogs.sun.com/dave/entry/biased_locking_in_hotspot for a 7.14 -// discussion of coherency costs. Note that all our current reference platforms 7.15 -// provide strong ST-ST order, so the issue is moot on IA32, x64, and SPARC. 7.16 +// OrderAccess::storestore() calls release() which at one time stored 0 7.17 +// into the global volatile OrderAccess::dummy variable. This store was 7.18 +// unnecessary for correctness. Many threads storing into a common location 7.19 +// causes considerable cache migration or "sloshing" on large SMP systems. 7.20 +// As such, I avoided using OrderAccess::storestore(). In some cases 7.21 +// OrderAccess::fence() -- which incurs local latency on the executing 7.22 +// processor -- is a better choice as it scales on SMP systems. 7.23 +// 7.24 +// See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for 7.25 +// a discussion of coherency costs. Note that all our current reference 7.26 +// platforms provide strong ST-ST order, so the issue is moot on IA32, 7.27 +// x64, and SPARC. 7.28 // 7.29 // As a general policy we use "volatile" to control compiler-based reordering 7.30 -// and explicit fences (barriers) to control for architectural reordering performed 7.31 -// by the CPU(s) or platform. 7.32 +// and explicit fences (barriers) to control for architectural reordering 7.33 +// performed by the CPU(s) or platform. 7.34 7.35 struct SharedGlobals { 7.36 // These are highly shared mostly-read variables. 7.37 @@ -1636,7 +1639,55 @@ 7.38 } 7.39 7.40 //------------------------------------------------------------------------------ 7.41 -// Non-product code 7.42 +// Debugging code 7.43 + 7.44 +void ObjectSynchronizer::sanity_checks(const bool verbose, 7.45 + const uint cache_line_size, 7.46 + int *error_cnt_ptr, 7.47 + int *warning_cnt_ptr) { 7.48 + u_char *addr_begin = (u_char*)&GVars; 7.49 + u_char *addr_stwRandom = (u_char*)&GVars.stwRandom; 7.50 + u_char *addr_hcSequence = (u_char*)&GVars.hcSequence; 7.51 + 7.52 + if (verbose) { 7.53 + tty->print_cr("INFO: sizeof(SharedGlobals)=" SIZE_FORMAT, 7.54 + sizeof(SharedGlobals)); 7.55 + } 7.56 + 7.57 + uint offset_stwRandom = (uint)(addr_stwRandom - addr_begin); 7.58 + if (verbose) tty->print_cr("INFO: offset(stwRandom)=%u", offset_stwRandom); 7.59 + 7.60 + uint offset_hcSequence = (uint)(addr_hcSequence - addr_begin); 7.61 + if (verbose) { 7.62 + tty->print_cr("INFO: offset(_hcSequence)=%u", offset_hcSequence); 7.63 + } 7.64 + 7.65 + if (cache_line_size != 0) { 7.66 + // We were able to determine the L1 data cache line size so 7.67 + // do some cache line specific sanity checks 7.68 + 7.69 + if (offset_stwRandom < cache_line_size) { 7.70 + tty->print_cr("WARNING: the SharedGlobals.stwRandom field is closer " 7.71 + "to the struct beginning than a cache line which permits " 7.72 + "false sharing."); 7.73 + (*warning_cnt_ptr)++; 7.74 + } 7.75 + 7.76 + if ((offset_hcSequence - offset_stwRandom) < cache_line_size) { 7.77 + tty->print_cr("WARNING: the SharedGlobals.stwRandom and " 7.78 + "SharedGlobals.hcSequence fields are closer than a cache " 7.79 + "line which permits false sharing."); 7.80 + (*warning_cnt_ptr)++; 7.81 + } 7.82 + 7.83 + if ((sizeof(SharedGlobals) - offset_hcSequence) < cache_line_size) { 7.84 + tty->print_cr("WARNING: the SharedGlobals.hcSequence field is closer " 7.85 + "to the struct end than a cache line which permits false " 7.86 + "sharing."); 7.87 + (*warning_cnt_ptr)++; 7.88 + } 7.89 + } 7.90 +} 7.91 7.92 #ifndef PRODUCT 7.93
8.1 --- a/src/share/vm/runtime/synchronizer.hpp Wed Aug 13 10:44:50 2014 +0200 8.2 +++ b/src/share/vm/runtime/synchronizer.hpp Fri Mar 17 03:39:23 2017 -0700 8.3 @@ -121,6 +121,9 @@ 8.4 static void oops_do(OopClosure* f); 8.5 8.6 // debugging 8.7 + static void sanity_checks(const bool verbose, 8.8 + const unsigned int cache_line_size, 8.9 + int *error_cnt_ptr, int *warning_cnt_ptr); 8.10 static void verify() PRODUCT_RETURN; 8.11 static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0; 8.12
9.1 --- a/src/share/vm/runtime/vm_version.cpp Wed Aug 13 10:44:50 2014 +0200 9.2 +++ b/src/share/vm/runtime/vm_version.cpp Fri Mar 17 03:39:23 2017 -0700 9.3 @@ -50,6 +50,7 @@ 9.4 bool Abstract_VM_Version::_supports_atomic_getadd4 = false; 9.5 bool Abstract_VM_Version::_supports_atomic_getadd8 = false; 9.6 unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U; 9.7 +unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0; 9.8 int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0; 9.9 9.10 #ifndef HOTSPOT_RELEASE_VERSION
10.1 --- a/src/share/vm/runtime/vm_version.hpp Wed Aug 13 10:44:50 2014 +0200 10.2 +++ b/src/share/vm/runtime/vm_version.hpp Fri Mar 17 03:39:23 2017 -0700 10.3 @@ -1,5 +1,5 @@ 10.4 /* 10.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 10.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 10.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 10.8 * 10.9 * This code is free software; you can redistribute it and/or modify it 10.10 @@ -42,6 +42,7 @@ 10.11 static bool _supports_atomic_getadd4; 10.12 static bool _supports_atomic_getadd8; 10.13 static unsigned int _logical_processors_per_package; 10.14 + static unsigned int _L1_data_cache_line_size; 10.15 static int _vm_major_version; 10.16 static int _vm_minor_version; 10.17 static int _vm_build_number; 10.18 @@ -114,6 +115,10 @@ 10.19 return _logical_processors_per_package; 10.20 } 10.21 10.22 + static unsigned int L1_data_cache_line_size() { 10.23 + return _L1_data_cache_line_size; 10.24 + } 10.25 + 10.26 // Need a space at the end of TLAB for prefetch instructions 10.27 // which may fault when accessing memory outside of heap. 10.28 static int reserve_for_allocation_prefetch() {