8040140: System.nanoTime() is slow and non-monotonic on OS X

Fri, 25 Apr 2014 07:40:33 +0200

author
sla
date
Fri, 25 Apr 2014 07:40:33 +0200
changeset 6667
917873d2983d
parent 6666
a062c3691003
child 6669
49961f279e24

8040140: System.nanoTime() is slow and non-monotonic on OS X
Reviewed-by: sspitsyn, shade, dholmes, acorn

src/os/bsd/vm/os_bsd.cpp file | annotate | diff | comparison | revisions
src/os/bsd/vm/os_bsd.hpp file | annotate | diff | comparison | revisions
src/os/solaris/vm/os_solaris.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/os.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/os/bsd/vm/os_bsd.cpp	Mon May 05 15:10:43 2014 +0200
     1.2 +++ b/src/os/bsd/vm/os_bsd.cpp	Fri Apr 25 07:40:33 2014 +0200
     1.3 @@ -127,8 +127,12 @@
     1.4  // global variables
     1.5  julong os::Bsd::_physical_memory = 0;
     1.6  
     1.7 -
     1.8 +#ifdef __APPLE__
     1.9 +mach_timebase_info_data_t os::Bsd::_timebase_info = {0, 0};
    1.10 +volatile uint64_t         os::Bsd::_max_abstime   = 0;
    1.11 +#else
    1.12  int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL;
    1.13 +#endif
    1.14  pthread_t os::Bsd::_main_thread;
    1.15  int os::Bsd::_page_size = -1;
    1.16  
    1.17 @@ -986,13 +990,15 @@
    1.18    return jlong(time.tv_sec) * 1000  +  jlong(time.tv_usec / 1000);
    1.19  }
    1.20  
    1.21 +#ifndef __APPLE__
    1.22  #ifndef CLOCK_MONOTONIC
    1.23  #define CLOCK_MONOTONIC (1)
    1.24  #endif
    1.25 +#endif
    1.26  
    1.27  #ifdef __APPLE__
    1.28  void os::Bsd::clock_init() {
    1.29 -        // XXXDARWIN: Investigate replacement monotonic clock
    1.30 +  mach_timebase_info(&_timebase_info);
    1.31  }
    1.32  #else
    1.33  void os::Bsd::clock_init() {
    1.34 @@ -1007,10 +1013,38 @@
    1.35  #endif
    1.36  
    1.37  
    1.38 +#ifdef __APPLE__
    1.39 +
    1.40 +jlong os::javaTimeNanos() {
    1.41 +    const uint64_t tm = mach_absolute_time();
    1.42 +    const uint64_t now = (tm * Bsd::_timebase_info.numer) / Bsd::_timebase_info.denom;
    1.43 +    const uint64_t prev = Bsd::_max_abstime;
    1.44 +    if (now <= prev) {
    1.45 +      return prev;   // same or retrograde time;
    1.46 +    }
    1.47 +    const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev);
    1.48 +    assert(obsv >= prev, "invariant");   // Monotonicity
    1.49 +    // If the CAS succeeded then we're done and return "now".
    1.50 +    // If the CAS failed and the observed value "obsv" is >= now then
    1.51 +    // we should return "obsv".  If the CAS failed and now > obsv > prv then
    1.52 +    // some other thread raced this thread and installed a new value, in which case
    1.53 +    // we could either (a) retry the entire operation, (b) retry trying to install now
    1.54 +    // or (c) just return obsv.  We use (c).   No loop is required although in some cases
    1.55 +    // we might discard a higher "now" value in deference to a slightly lower but freshly
    1.56 +    // installed obsv value.   That's entirely benign -- it admits no new orderings compared
    1.57 +    // to (a) or (b) -- and greatly reduces coherence traffic.
    1.58 +    // We might also condition (c) on the magnitude of the delta between obsv and now.
    1.59 +    // Avoiding excessive CAS operations to hot RW locations is critical.
    1.60 +    // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
    1.61 +    return (prev == obsv) ? now : obsv;
    1.62 +}
    1.63 +
    1.64 +#else // __APPLE__
    1.65 +
    1.66  jlong os::javaTimeNanos() {
    1.67    if (Bsd::supports_monotonic_clock()) {
    1.68      struct timespec tp;
    1.69 -    int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp);
    1.70 +    int status = Bsd::_clock_gettime(CLOCK_MONOTONIC, &tp);
    1.71      assert(status == 0, "gettime error");
    1.72      jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
    1.73      return result;
    1.74 @@ -1023,6 +1057,8 @@
    1.75    }
    1.76  }
    1.77  
    1.78 +#endif // __APPLE__
    1.79 +
    1.80  void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
    1.81    if (Bsd::supports_monotonic_clock()) {
    1.82      info_ptr->max_value = ALL_64_BITS;
     2.1 --- a/src/os/bsd/vm/os_bsd.hpp	Mon May 05 15:10:43 2014 +0200
     2.2 +++ b/src/os/bsd/vm/os_bsd.hpp	Fri Apr 25 07:40:33 2014 +0200
     2.3 @@ -58,7 +58,13 @@
     2.4    // For signal flags diagnostics
     2.5    static int sigflags[MAXSIGNUM];
     2.6  
     2.7 +#ifdef __APPLE__
     2.8 +  // mach_absolute_time
     2.9 +  static mach_timebase_info_data_t _timebase_info;
    2.10 +  static volatile uint64_t         _max_abstime;
    2.11 +#else
    2.12    static int (*_clock_gettime)(clockid_t, struct timespec *);
    2.13 +#endif
    2.14  
    2.15    static GrowableArray<int>* _cpu_to_node;
    2.16  
    2.17 @@ -135,11 +141,11 @@
    2.18    static void clock_init(void);
    2.19  
    2.20    static inline bool supports_monotonic_clock() {
    2.21 +#ifdef __APPLE__
    2.22 +    return true;
    2.23 +#else
    2.24      return _clock_gettime != NULL;
    2.25 -  }
    2.26 -
    2.27 -  static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
    2.28 -    return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
    2.29 +#endif
    2.30    }
    2.31  
    2.32    // Stack repair handling
     3.1 --- a/src/os/solaris/vm/os_solaris.cpp	Mon May 05 15:10:43 2014 +0200
     3.2 +++ b/src/os/solaris/vm/os_solaris.cpp	Fri Apr 25 07:40:33 2014 +0200
     3.3 @@ -415,11 +415,7 @@
     3.4  
     3.5  static hrtime_t first_hrtime = 0;
     3.6  static const hrtime_t hrtime_hz = 1000*1000*1000;
     3.7 -const int LOCK_BUSY = 1;
     3.8 -const int LOCK_FREE = 0;
     3.9 -const int LOCK_INVALID = -1;
    3.10  static volatile hrtime_t max_hrtime = 0;
    3.11 -static volatile int max_hrtime_lock = LOCK_FREE;     // Update counter with LSB as lock-in-progress
    3.12  
    3.13  
    3.14  void os::Solaris::initialize_system_info() {
    3.15 @@ -1534,58 +1530,31 @@
    3.16  }
    3.17  
    3.18  
    3.19 -// gethrtime can move backwards if read from one cpu and then a different cpu
    3.20 -// getTimeNanos is guaranteed to not move backward on Solaris
    3.21 -// local spinloop created as faster for a CAS on an int than
    3.22 -// a CAS on a 64bit jlong. Also Atomic::cmpxchg for jlong is not
    3.23 -// supported on sparc v8 or pre supports_cx8 intel boxes.
    3.24 -// oldgetTimeNanos for systems which do not support CAS on 64bit jlong
    3.25 -// i.e. sparc v8 and pre supports_cx8 (i486) intel boxes
    3.26 -inline hrtime_t oldgetTimeNanos() {
    3.27 -  int gotlock = LOCK_INVALID;
    3.28 -  hrtime_t newtime = gethrtime();
    3.29 -
    3.30 -  for (;;) {
    3.31 -// grab lock for max_hrtime
    3.32 -    int curlock = max_hrtime_lock;
    3.33 -    if (curlock & LOCK_BUSY)  continue;
    3.34 -    if (gotlock = Atomic::cmpxchg(LOCK_BUSY, &max_hrtime_lock, LOCK_FREE) != LOCK_FREE) continue;
    3.35 -    if (newtime > max_hrtime) {
    3.36 -      max_hrtime = newtime;
    3.37 -    } else {
    3.38 -      newtime = max_hrtime;
    3.39 -    }
    3.40 -    // release lock
    3.41 -    max_hrtime_lock = LOCK_FREE;
    3.42 -    return newtime;
    3.43 -  }
    3.44 -}
    3.45 -// gethrtime can move backwards if read from one cpu and then a different cpu
    3.46 -// getTimeNanos is guaranteed to not move backward on Solaris
    3.47 +// gethrtime() should be monotonic according to the documentation,
    3.48 +// but some virtualized platforms are known to break this guarantee.
    3.49 +// getTimeNanos() must be guaranteed not to move backwards, so we
    3.50 +// are forced to add a check here.
    3.51  inline hrtime_t getTimeNanos() {
    3.52 -  if (VM_Version::supports_cx8()) {
    3.53 -    const hrtime_t now = gethrtime();
    3.54 -    // Use atomic long load since 32-bit x86 uses 2 registers to keep long.
    3.55 -    const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime);
    3.56 -    if (now <= prev)  return prev;   // same or retrograde time;
    3.57 -    const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
    3.58 -    assert(obsv >= prev, "invariant");   // Monotonicity
    3.59 -    // If the CAS succeeded then we're done and return "now".
    3.60 -    // If the CAS failed and the observed value "obs" is >= now then
    3.61 -    // we should return "obs".  If the CAS failed and now > obs > prv then
    3.62 -    // some other thread raced this thread and installed a new value, in which case
    3.63 -    // we could either (a) retry the entire operation, (b) retry trying to install now
    3.64 -    // or (c) just return obs.  We use (c).   No loop is required although in some cases
    3.65 -    // we might discard a higher "now" value in deference to a slightly lower but freshly
    3.66 -    // installed obs value.   That's entirely benign -- it admits no new orderings compared
    3.67 -    // to (a) or (b) -- and greatly reduces coherence traffic.
    3.68 -    // We might also condition (c) on the magnitude of the delta between obs and now.
    3.69 -    // Avoiding excessive CAS operations to hot RW locations is critical.
    3.70 -    // See http://blogs.sun.com/dave/entry/cas_and_cache_trivia_invalidate
    3.71 -    return (prev == obsv) ? now : obsv ;
    3.72 -  } else {
    3.73 -    return oldgetTimeNanos();
    3.74 -  }
    3.75 +  const hrtime_t now = gethrtime();
    3.76 +  const hrtime_t prev = max_hrtime;
    3.77 +  if (now <= prev) {
    3.78 +    return prev;   // same or retrograde time;
    3.79 +  }
    3.80 +  const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
    3.81 +  assert(obsv >= prev, "invariant");   // Monotonicity
    3.82 +  // If the CAS succeeded then we're done and return "now".
    3.83 +  // If the CAS failed and the observed value "obsv" is >= now then
    3.84 +  // we should return "obsv".  If the CAS failed and now > obsv > prv then
    3.85 +  // some other thread raced this thread and installed a new value, in which case
    3.86 +  // we could either (a) retry the entire operation, (b) retry trying to install now
    3.87 +  // or (c) just return obsv.  We use (c).   No loop is required although in some cases
    3.88 +  // we might discard a higher "now" value in deference to a slightly lower but freshly
    3.89 +  // installed obsv value.   That's entirely benign -- it admits no new orderings compared
    3.90 +  // to (a) or (b) -- and greatly reduces coherence traffic.
    3.91 +  // We might also condition (c) on the magnitude of the delta between obsv and now.
    3.92 +  // Avoiding excessive CAS operations to hot RW locations is critical.
    3.93 +  // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
    3.94 +  return (prev == obsv) ? now : obsv;
    3.95  }
    3.96  
    3.97  // Time since start-up in seconds to a fine granularity.
     4.1 --- a/src/share/vm/runtime/os.hpp	Mon May 05 15:10:43 2014 +0200
     4.2 +++ b/src/share/vm/runtime/os.hpp	Fri Apr 25 07:40:33 2014 +0200
     4.3 @@ -48,6 +48,9 @@
     4.4  #ifdef TARGET_OS_FAMILY_bsd
     4.5  # include "jvm_bsd.h"
     4.6  # include <setjmp.h>
     4.7 +# ifdef __APPLE__
     4.8 +#  include <mach/mach_time.h>
     4.9 +# endif
    4.10  #endif
    4.11  
    4.12  class AgentLibrary;

mercurial