1.1 --- a/src/share/vm/runtime/synchronizer.cpp Wed Aug 13 10:44:50 2014 +0200 1.2 +++ b/src/share/vm/runtime/synchronizer.cpp Fri Mar 17 03:39:23 2017 -0700 1.3 @@ -437,19 +437,22 @@ 1.4 // Hash Code handling 1.5 // 1.6 // Performance concern: 1.7 -// OrderAccess::storestore() calls release() which STs 0 into the global volatile 1.8 -// OrderAccess::Dummy variable. This store is unnecessary for correctness. 1.9 -// Many threads STing into a common location causes considerable cache migration 1.10 -// or "sloshing" on large SMP system. As such, I avoid using OrderAccess::storestore() 1.11 -// until it's repaired. In some cases OrderAccess::fence() -- which incurs local 1.12 -// latency on the executing processor -- is a better choice as it scales on SMP 1.13 -// systems. See http://blogs.sun.com/dave/entry/biased_locking_in_hotspot for a 1.14 -// discussion of coherency costs. Note that all our current reference platforms 1.15 -// provide strong ST-ST order, so the issue is moot on IA32, x64, and SPARC. 1.16 +// OrderAccess::storestore() calls release() which at one time stored 0 1.17 +// into the global volatile OrderAccess::dummy variable. This store was 1.18 +// unnecessary for correctness. Many threads storing into a common location 1.19 +// causes considerable cache migration or "sloshing" on large SMP systems. 1.20 +// As such, I avoided using OrderAccess::storestore(). In some cases 1.21 +// OrderAccess::fence() -- which incurs local latency on the executing 1.22 +// processor -- is a better choice as it scales on SMP systems. 1.23 +// 1.24 +// See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for 1.25 +// a discussion of coherency costs. Note that all our current reference 1.26 +// platforms provide strong ST-ST order, so the issue is moot on IA32, 1.27 +// x64, and SPARC. 1.28 // 1.29 // As a general policy we use "volatile" to control compiler-based reordering 1.30 -// and explicit fences (barriers) to control for architectural reordering performed 1.31 -// by the CPU(s) or platform. 1.32 +// and explicit fences (barriers) to control for architectural reordering 1.33 +// performed by the CPU(s) or platform. 1.34 1.35 struct SharedGlobals { 1.36 // These are highly shared mostly-read variables. 1.37 @@ -1636,7 +1639,55 @@ 1.38 } 1.39 1.40 //------------------------------------------------------------------------------ 1.41 -// Non-product code 1.42 +// Debugging code 1.43 + 1.44 +void ObjectSynchronizer::sanity_checks(const bool verbose, 1.45 + const uint cache_line_size, 1.46 + int *error_cnt_ptr, 1.47 + int *warning_cnt_ptr) { 1.48 + u_char *addr_begin = (u_char*)&GVars; 1.49 + u_char *addr_stwRandom = (u_char*)&GVars.stwRandom; 1.50 + u_char *addr_hcSequence = (u_char*)&GVars.hcSequence; 1.51 + 1.52 + if (verbose) { 1.53 + tty->print_cr("INFO: sizeof(SharedGlobals)=" SIZE_FORMAT, 1.54 + sizeof(SharedGlobals)); 1.55 + } 1.56 + 1.57 + uint offset_stwRandom = (uint)(addr_stwRandom - addr_begin); 1.58 + if (verbose) tty->print_cr("INFO: offset(stwRandom)=%u", offset_stwRandom); 1.59 + 1.60 + uint offset_hcSequence = (uint)(addr_hcSequence - addr_begin); 1.61 + if (verbose) { 1.62 + tty->print_cr("INFO: offset(_hcSequence)=%u", offset_hcSequence); 1.63 + } 1.64 + 1.65 + if (cache_line_size != 0) { 1.66 + // We were able to determine the L1 data cache line size so 1.67 + // do some cache line specific sanity checks 1.68 + 1.69 + if (offset_stwRandom < cache_line_size) { 1.70 + tty->print_cr("WARNING: the SharedGlobals.stwRandom field is closer " 1.71 + "to the struct beginning than a cache line which permits " 1.72 + "false sharing."); 1.73 + (*warning_cnt_ptr)++; 1.74 + } 1.75 + 1.76 + if ((offset_hcSequence - offset_stwRandom) < cache_line_size) { 1.77 + tty->print_cr("WARNING: the SharedGlobals.stwRandom and " 1.78 + "SharedGlobals.hcSequence fields are closer than a cache " 1.79 + "line which permits false sharing."); 1.80 + (*warning_cnt_ptr)++; 1.81 + } 1.82 + 1.83 + if ((sizeof(SharedGlobals) - offset_hcSequence) < cache_line_size) { 1.84 + tty->print_cr("WARNING: the SharedGlobals.hcSequence field is closer " 1.85 + "to the struct end than a cache line which permits false " 1.86 + "sharing."); 1.87 + (*warning_cnt_ptr)++; 1.88 + } 1.89 + } 1.90 +} 1.91 1.92 #ifndef PRODUCT 1.93