8049717: expose L1_data_cache_line_size for diagnostic/sanity checks

Fri, 17 Mar 2017 03:39:23 -0700

author
kevinw
date
Fri, 17 Mar 2017 03:39:23 -0700
changeset 8729
402618d5afc9
parent 8728
8119c543f2af
child 8730
4b7ea2e3f901

8049717: expose L1_data_cache_line_size for diagnostic/sanity checks
Summary: Add support for VM_Version::L1_data_cache_line_size().
Reviewed-by: dsimms, kvn, dholmes

src/cpu/sparc/vm/vm_version_sparc.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.hpp file | annotate | diff | comparison | revisions
src/share/vm/prims/jni.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/objectMonitor.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/objectMonitor.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/synchronizer.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/synchronizer.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/vm_version.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/vm_version.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Aug 13 10:44:50 2014 +0200
     1.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Mar 17 03:39:23 2017 -0700
     1.3 @@ -259,6 +259,49 @@
     1.4    // buf is started with ", " or is empty
     1.5    _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf);
     1.6  
     1.7 +  // There are three 64-bit SPARC families that do not overlap, e.g.,
     1.8 +  // both is_ultra3() and is_sparc64() cannot be true at the same time.
     1.9 +  // Within these families, there can be more than one chip, e.g.,
    1.10 +  // is_T4() and is_T7() machines are also is_niagara().
    1.11 +  if (is_ultra3()) {
    1.12 +    assert(_L1_data_cache_line_size == 0, "overlap with Ultra3 family");
    1.13 +    // Ref: UltraSPARC III Cu Processor
    1.14 +    _L1_data_cache_line_size = 64;
    1.15 +  }
    1.16 +  if (is_niagara()) {
    1.17 +    assert(_L1_data_cache_line_size == 0, "overlap with niagara family");
    1.18 +    // All Niagara's are sun4v's, but not all sun4v's are Niagaras, e.g.,
    1.19 +    // Fujitsu SPARC64 is sun4v, but we don't want it in this block.
    1.20 +    //
    1.21 +    // Ref: UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
    1.22 +    // Appendix F.1.3.1 Cacheable Accesses
    1.23 +    // -> 16-byte L1 cache line size
    1.24 +    //
    1.25 +    // Ref: UltraSPARC T2: A Highly-Threaded, Power-Efficient, SPARC SOC
    1.26 +    // Section III: SPARC Processor Core
    1.27 +    // -> 16-byte L1 cache line size
    1.28 +    //
    1.29 +    // Ref: Oracle's SPARC T4-1, SPARC T4-2, SPARC T4-4, and SPARC T4-1B Server Architecture
    1.30 +    // Section SPARC T4 Processor Cache Architecture
    1.31 +    // -> 32-byte L1 cache line size (no longer see that info on this ref)
    1.32 +    //
    1.33 +    // XXX - still need a T7 reference here
    1.34 +    //
    1.35 +    if (is_T7()) {  // T7 or newer
    1.36 +      _L1_data_cache_line_size = 64;
    1.37 +    } else if (is_T4()) {  // T4 or newer (until T7)
    1.38 +      _L1_data_cache_line_size = 32;
    1.39 +    } else {  // T1 or newer (until T4)
    1.40 +      _L1_data_cache_line_size = 16;
    1.41 +    }
    1.42 +  }
    1.43 +  if (is_sparc64()) {
    1.44 +    guarantee(_L1_data_cache_line_size == 0, "overlap with SPARC64 family");
    1.45 +    // Ref: Fujitsu SPARC64 VII Processor
    1.46 +    // Section 4 Cache System
    1.47 +    _L1_data_cache_line_size = 64;
    1.48 +  }
    1.49 +
    1.50    // UseVIS is set to the smallest of what hardware supports and what
    1.51    // the command line requires.  I.e., you cannot set UseVIS to 3 on
    1.52    // older UltraSparc which do not support it.
    1.53 @@ -364,6 +407,7 @@
    1.54  
    1.55  #ifndef PRODUCT
    1.56    if (PrintMiscellaneous && Verbose) {
    1.57 +    tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
    1.58      tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
    1.59      tty->print("Allocation");
    1.60      if (AllocatePrefetchStyle <= 0) {
     2.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Aug 13 10:44:50 2014 +0200
     2.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Mar 17 03:39:23 2017 -0700
     2.3 @@ -406,6 +406,8 @@
     2.4    _stepping = 0;
     2.5    _cpuFeatures = 0;
     2.6    _logical_processors_per_package = 1;
     2.7 +  // i486 internal cache is both I&D and has a 16-byte line size
     2.8 +  _L1_data_cache_line_size = 16;
     2.9  
    2.10    if (!Use486InstrsOnly) {
    2.11      // Get raw processor info
    2.12 @@ -424,6 +426,7 @@
    2.13        // Logical processors are only available on P4s and above,
    2.14        // and only if hyperthreading is available.
    2.15        _logical_processors_per_package = logical_processor_count();
    2.16 +      _L1_data_cache_line_size = L1_line_size();
    2.17      }
    2.18    }
    2.19  
    2.20 @@ -1034,6 +1037,7 @@
    2.21    if (PrintMiscellaneous && Verbose) {
    2.22      tty->print_cr("Logical CPUs per core: %u",
    2.23                    logical_processors_per_package());
    2.24 +    tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
    2.25      tty->print("UseSSE=%d", (int) UseSSE);
    2.26      if (UseAVX > 0) {
    2.27        tty->print("  UseAVX=%d", (int) UseAVX);
     3.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Wed Aug 13 10:44:50 2014 +0200
     3.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Fri Mar 17 03:39:23 2017 -0700
     3.3 @@ -1,5 +1,5 @@
     3.4  /*
     3.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
     3.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3.8   *
     3.9   * This code is free software; you can redistribute it and/or modify it
    3.10 @@ -595,7 +595,7 @@
    3.11      return (result == 0 ? 1 : result);
    3.12    }
    3.13  
    3.14 -  static intx prefetch_data_size()  {
    3.15 +  static intx L1_line_size()  {
    3.16      intx result = 0;
    3.17      if (is_intel()) {
    3.18        result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
    3.19 @@ -607,6 +607,10 @@
    3.20      return result;
    3.21    }
    3.22  
    3.23 +  static intx prefetch_data_size()  {
    3.24 +    return L1_line_size();
    3.25 +  }
    3.26 +
    3.27    //
    3.28    // Feature identification
    3.29    //
     4.1 --- a/src/share/vm/prims/jni.cpp	Wed Aug 13 10:44:50 2014 +0200
     4.2 +++ b/src/share/vm/prims/jni.cpp	Fri Mar 17 03:39:23 2017 -0700
     4.3 @@ -5129,6 +5129,7 @@
     4.4      run_unit_test(TestKlass_test());
     4.5      run_unit_test(Test_linked_list());
     4.6      run_unit_test(TestChunkedList_test());
     4.7 +    run_unit_test(ObjectMonitor::sanity_checks());
     4.8  #if INCLUDE_VM_STRUCTS
     4.9      run_unit_test(VMStructs::test());
    4.10  #endif
     5.1 --- a/src/share/vm/runtime/objectMonitor.cpp	Wed Aug 13 10:44:50 2014 +0200
     5.2 +++ b/src/share/vm/runtime/objectMonitor.cpp	Fri Mar 17 03:39:23 2017 -0700
     5.3 @@ -2529,6 +2529,10 @@
     5.4    SETKNOB(FastHSSEC) ;
     5.5    #undef SETKNOB
     5.6  
     5.7 +  if (Knob_Verbose) {
     5.8 +    sanity_checks();
     5.9 +  }
    5.10 +
    5.11    if (os::is_MP()) {
    5.12       BackOffMask = (1 << Knob_SpinBackOff) - 1 ;
    5.13       if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ;
    5.14 @@ -2549,6 +2553,66 @@
    5.15    InitDone = 1 ;
    5.16  }
    5.17  
    5.18 +void ObjectMonitor::sanity_checks() {
    5.19 +  int error_cnt = 0;
    5.20 +  int warning_cnt = 0;
    5.21 +  bool verbose = Knob_Verbose != 0 NOT_PRODUCT(|| VerboseInternalVMTests);
    5.22 +
    5.23 +  if (verbose) {
    5.24 +    tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT,
    5.25 +                  sizeof(ObjectMonitor));
    5.26 +  }
    5.27 +
    5.28 +  uint cache_line_size = VM_Version::L1_data_cache_line_size();
    5.29 +  if (verbose) {
    5.30 +    tty->print_cr("INFO: L1_data_cache_line_size=%u", cache_line_size);
    5.31 +  }
    5.32 +
    5.33 +  ObjectMonitor dummy;
    5.34 +  u_char *addr_begin  = (u_char*)&dummy;
    5.35 +  u_char *addr_header = (u_char*)&dummy._header;
    5.36 +  u_char *addr_owner  = (u_char*)&dummy._owner;
    5.37 +
    5.38 +  uint offset_header = (uint)(addr_header - addr_begin);
    5.39 +  if (verbose) tty->print_cr("INFO: offset(_header)=%u", offset_header);
    5.40 +
    5.41 +  uint offset_owner = (uint)(addr_owner - addr_begin);
    5.42 +  if (verbose) tty->print_cr("INFO: offset(_owner)=%u", offset_owner);
    5.43 +
    5.44 +  if ((uint)(addr_header - addr_begin) != 0) {
    5.45 +    tty->print_cr("ERROR: offset(_header) must be zero (0).");
    5.46 +    error_cnt++;
    5.47 +  }
    5.48 +
    5.49 +  if (cache_line_size != 0) {
    5.50 +    // We were able to determine the L1 data cache line size so
    5.51 +    // do some cache line specific sanity checks
    5.52 +
    5.53 +    if ((offset_owner - offset_header) < cache_line_size) {
    5.54 +      tty->print_cr("WARNING: the _header and _owner fields are closer "
    5.55 +                    "than a cache line which permits false sharing.");
    5.56 +      warning_cnt++;
    5.57 +    }
    5.58 +
    5.59 +    if ((sizeof(ObjectMonitor) % cache_line_size) != 0) {
    5.60 +      tty->print_cr("WARNING: ObjectMonitor size is not a multiple of "
    5.61 +                    "a cache line which permits false sharing.");
    5.62 +      warning_cnt++;
    5.63 +    }
    5.64 +  }
    5.65 +
    5.66 +  ObjectSynchronizer::sanity_checks(verbose, cache_line_size, &error_cnt,
    5.67 +                                    &warning_cnt);
    5.68 +
    5.69 +  if (verbose || error_cnt != 0 || warning_cnt != 0) {
    5.70 +    tty->print_cr("INFO: error_cnt=%d", error_cnt);
    5.71 +    tty->print_cr("INFO: warning_cnt=%d", warning_cnt);
    5.72 +  }
    5.73 +
    5.74 +  guarantee(error_cnt == 0,
    5.75 +            "Fatal error(s) found in ObjectMonitor::sanity_checks()");
    5.76 +}
    5.77 +
    5.78  #ifndef PRODUCT
    5.79  void ObjectMonitor::verify() {
    5.80  }
     6.1 --- a/src/share/vm/runtime/objectMonitor.hpp	Wed Aug 13 10:44:50 2014 +0200
     6.2 +++ b/src/share/vm/runtime/objectMonitor.hpp	Fri Mar 17 03:39:23 2017 -0700
     6.3 @@ -189,6 +189,8 @@
     6.4    bool      check(TRAPS);       // true if the thread owns the monitor.
     6.5    void      check_slow(TRAPS);
     6.6    void      clear();
     6.7 +  static void sanity_checks();  // public for -XX:+ExecuteInternalVMTests
     6.8 +                                // in PRODUCT for -XX:SyncKnobs=Verbose=1
     6.9  #ifndef PRODUCT
    6.10    void      verify();
    6.11    void      print();
    6.12 @@ -234,8 +236,6 @@
    6.13  
    6.14    // WARNING: this must be the very first word of ObjectMonitor
    6.15    // This means this class can't use any virtual member functions.
    6.16 -  // TODO-FIXME: assert that offsetof(_header) is 0 or get rid of the
    6.17 -  // implicit 0 offset in emitted code.
    6.18  
    6.19    volatile markOop   _header;       // displaced object header word - mark
    6.20    void*     volatile _object;       // backward object pointer - strong root
     7.1 --- a/src/share/vm/runtime/synchronizer.cpp	Wed Aug 13 10:44:50 2014 +0200
     7.2 +++ b/src/share/vm/runtime/synchronizer.cpp	Fri Mar 17 03:39:23 2017 -0700
     7.3 @@ -437,19 +437,22 @@
     7.4  // Hash Code handling
     7.5  //
     7.6  // Performance concern:
     7.7 -// OrderAccess::storestore() calls release() which STs 0 into the global volatile
     7.8 -// OrderAccess::Dummy variable.  This store is unnecessary for correctness.
     7.9 -// Many threads STing into a common location causes considerable cache migration
    7.10 -// or "sloshing" on large SMP system.  As such, I avoid using OrderAccess::storestore()
    7.11 -// until it's repaired.  In some cases OrderAccess::fence() -- which incurs local
    7.12 -// latency on the executing processor -- is a better choice as it scales on SMP
    7.13 -// systems.  See http://blogs.sun.com/dave/entry/biased_locking_in_hotspot for a
    7.14 -// discussion of coherency costs.  Note that all our current reference platforms
    7.15 -// provide strong ST-ST order, so the issue is moot on IA32, x64, and SPARC.
    7.16 +// OrderAccess::storestore() calls release() which at one time stored 0
    7.17 +// into the global volatile OrderAccess::dummy variable. This store was
    7.18 +// unnecessary for correctness. Many threads storing into a common location
    7.19 +// causes considerable cache migration or "sloshing" on large SMP systems.
    7.20 +// As such, I avoided using OrderAccess::storestore(). In some cases
    7.21 +// OrderAccess::fence() -- which incurs local latency on the executing
    7.22 +// processor -- is a better choice as it scales on SMP systems.
    7.23 +//
    7.24 +// See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
    7.25 +// a discussion of coherency costs. Note that all our current reference
    7.26 +// platforms provide strong ST-ST order, so the issue is moot on IA32,
    7.27 +// x64, and SPARC.
    7.28  //
    7.29  // As a general policy we use "volatile" to control compiler-based reordering
    7.30 -// and explicit fences (barriers) to control for architectural reordering performed
    7.31 -// by the CPU(s) or platform.
    7.32 +// and explicit fences (barriers) to control for architectural reordering
    7.33 +// performed by the CPU(s) or platform.
    7.34  
    7.35  struct SharedGlobals {
    7.36      // These are highly shared mostly-read variables.
    7.37 @@ -1636,7 +1639,55 @@
    7.38  }
    7.39  
    7.40  //------------------------------------------------------------------------------
    7.41 -// Non-product code
    7.42 +// Debugging code
    7.43 +
    7.44 +void ObjectSynchronizer::sanity_checks(const bool verbose,
    7.45 +                                       const uint cache_line_size,
    7.46 +                                       int *error_cnt_ptr,
    7.47 +                                       int *warning_cnt_ptr) {
    7.48 +  u_char *addr_begin      = (u_char*)&GVars;
    7.49 +  u_char *addr_stwRandom  = (u_char*)&GVars.stwRandom;
    7.50 +  u_char *addr_hcSequence = (u_char*)&GVars.hcSequence;
    7.51 +
    7.52 +  if (verbose) {
    7.53 +    tty->print_cr("INFO: sizeof(SharedGlobals)=" SIZE_FORMAT,
    7.54 +                  sizeof(SharedGlobals));
    7.55 +  }
    7.56 +
    7.57 +  uint offset_stwRandom = (uint)(addr_stwRandom - addr_begin);
    7.58 +  if (verbose) tty->print_cr("INFO: offset(stwRandom)=%u", offset_stwRandom);
    7.59 +
    7.60 +  uint offset_hcSequence = (uint)(addr_hcSequence - addr_begin);
    7.61 +  if (verbose) {
    7.62 +    tty->print_cr("INFO: offset(_hcSequence)=%u", offset_hcSequence);
    7.63 +  }
    7.64 +
    7.65 +  if (cache_line_size != 0) {
    7.66 +    // We were able to determine the L1 data cache line size so
    7.67 +    // do some cache line specific sanity checks
    7.68 +
    7.69 +    if (offset_stwRandom < cache_line_size) {
    7.70 +      tty->print_cr("WARNING: the SharedGlobals.stwRandom field is closer "
    7.71 +                    "to the struct beginning than a cache line which permits "
    7.72 +                    "false sharing.");
    7.73 +      (*warning_cnt_ptr)++;
    7.74 +    }
    7.75 +
    7.76 +    if ((offset_hcSequence - offset_stwRandom) < cache_line_size) {
    7.77 +      tty->print_cr("WARNING: the SharedGlobals.stwRandom and "
    7.78 +                    "SharedGlobals.hcSequence fields are closer than a cache "
    7.79 +                    "line which permits false sharing.");
    7.80 +      (*warning_cnt_ptr)++;
    7.81 +    }
    7.82 +
    7.83 +    if ((sizeof(SharedGlobals) - offset_hcSequence) < cache_line_size) {
    7.84 +      tty->print_cr("WARNING: the SharedGlobals.hcSequence field is closer "
    7.85 +                    "to the struct end than a cache line which permits false "
    7.86 +                    "sharing.");
    7.87 +      (*warning_cnt_ptr)++;
    7.88 +    }
    7.89 +  }
    7.90 +}
    7.91  
    7.92  #ifndef PRODUCT
    7.93  
     8.1 --- a/src/share/vm/runtime/synchronizer.hpp	Wed Aug 13 10:44:50 2014 +0200
     8.2 +++ b/src/share/vm/runtime/synchronizer.hpp	Fri Mar 17 03:39:23 2017 -0700
     8.3 @@ -121,6 +121,9 @@
     8.4    static void oops_do(OopClosure* f);
     8.5  
     8.6    // debugging
     8.7 +  static void sanity_checks(const bool verbose,
     8.8 +                            const unsigned int cache_line_size,
     8.9 +                            int *error_cnt_ptr, int *warning_cnt_ptr);
    8.10    static void verify() PRODUCT_RETURN;
    8.11    static int  verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0;
    8.12  
     9.1 --- a/src/share/vm/runtime/vm_version.cpp	Wed Aug 13 10:44:50 2014 +0200
     9.2 +++ b/src/share/vm/runtime/vm_version.cpp	Fri Mar 17 03:39:23 2017 -0700
     9.3 @@ -50,6 +50,7 @@
     9.4  bool Abstract_VM_Version::_supports_atomic_getadd4 = false;
     9.5  bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
     9.6  unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
     9.7 +unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0;
     9.8  int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
     9.9  
    9.10  #ifndef HOTSPOT_RELEASE_VERSION
    10.1 --- a/src/share/vm/runtime/vm_version.hpp	Wed Aug 13 10:44:50 2014 +0200
    10.2 +++ b/src/share/vm/runtime/vm_version.hpp	Fri Mar 17 03:39:23 2017 -0700
    10.3 @@ -1,5 +1,5 @@
    10.4  /*
    10.5 - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
    10.6 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
    10.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    10.8   *
    10.9   * This code is free software; you can redistribute it and/or modify it
   10.10 @@ -42,6 +42,7 @@
   10.11    static bool         _supports_atomic_getadd4;
   10.12    static bool         _supports_atomic_getadd8;
   10.13    static unsigned int _logical_processors_per_package;
   10.14 +  static unsigned int _L1_data_cache_line_size;
   10.15    static int          _vm_major_version;
   10.16    static int          _vm_minor_version;
   10.17    static int          _vm_build_number;
   10.18 @@ -114,6 +115,10 @@
   10.19      return _logical_processors_per_package;
   10.20    }
   10.21  
   10.22 +  static unsigned int L1_data_cache_line_size() {
   10.23 +    return _L1_data_cache_line_size;
   10.24 +  }
   10.25 +
   10.26    // Need a space at the end of TLAB for prefetch instructions
   10.27    // which may fault when accessing memory outside of heap.
   10.28    static int reserve_for_allocation_prefetch() {

mercurial