Fri, 25 Apr 2014 07:40:33 +0200
8040140: System.nanoTime() is slow and non-monotonic on OS X
Reviewed-by: sspitsyn, shade, dholmes, acorn
1.1 --- a/src/os/bsd/vm/os_bsd.cpp Mon May 05 15:10:43 2014 +0200 1.2 +++ b/src/os/bsd/vm/os_bsd.cpp Fri Apr 25 07:40:33 2014 +0200 1.3 @@ -127,8 +127,12 @@ 1.4 // global variables 1.5 julong os::Bsd::_physical_memory = 0; 1.6 1.7 - 1.8 +#ifdef __APPLE__ 1.9 +mach_timebase_info_data_t os::Bsd::_timebase_info = {0, 0}; 1.10 +volatile uint64_t os::Bsd::_max_abstime = 0; 1.11 +#else 1.12 int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL; 1.13 +#endif 1.14 pthread_t os::Bsd::_main_thread; 1.15 int os::Bsd::_page_size = -1; 1.16 1.17 @@ -986,13 +990,15 @@ 1.18 return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000); 1.19 } 1.20 1.21 +#ifndef __APPLE__ 1.22 #ifndef CLOCK_MONOTONIC 1.23 #define CLOCK_MONOTONIC (1) 1.24 #endif 1.25 +#endif 1.26 1.27 #ifdef __APPLE__ 1.28 void os::Bsd::clock_init() { 1.29 - // XXXDARWIN: Investigate replacement monotonic clock 1.30 + mach_timebase_info(&_timebase_info); 1.31 } 1.32 #else 1.33 void os::Bsd::clock_init() { 1.34 @@ -1007,10 +1013,38 @@ 1.35 #endif 1.36 1.37 1.38 +#ifdef __APPLE__ 1.39 + 1.40 +jlong os::javaTimeNanos() { 1.41 + const uint64_t tm = mach_absolute_time(); 1.42 + const uint64_t now = (tm * Bsd::_timebase_info.numer) / Bsd::_timebase_info.denom; 1.43 + const uint64_t prev = Bsd::_max_abstime; 1.44 + if (now <= prev) { 1.45 + return prev; // same or retrograde time; 1.46 + } 1.47 + const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev); 1.48 + assert(obsv >= prev, "invariant"); // Monotonicity 1.49 + // If the CAS succeeded then we're done and return "now". 1.50 + // If the CAS failed and the observed value "obsv" is >= now then 1.51 + // we should return "obsv". If the CAS failed and now > obsv > prv then 1.52 + // some other thread raced this thread and installed a new value, in which case 1.53 + // we could either (a) retry the entire operation, (b) retry trying to install now 1.54 + // or (c) just return obsv. We use (c). No loop is required although in some cases 1.55 + // we might discard a higher "now" value in deference to a slightly lower but freshly 1.56 + // installed obsv value. That's entirely benign -- it admits no new orderings compared 1.57 + // to (a) or (b) -- and greatly reduces coherence traffic. 1.58 + // We might also condition (c) on the magnitude of the delta between obsv and now. 1.59 + // Avoiding excessive CAS operations to hot RW locations is critical. 1.60 + // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate 1.61 + return (prev == obsv) ? now : obsv; 1.62 +} 1.63 + 1.64 +#else // __APPLE__ 1.65 + 1.66 jlong os::javaTimeNanos() { 1.67 if (Bsd::supports_monotonic_clock()) { 1.68 struct timespec tp; 1.69 - int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp); 1.70 + int status = Bsd::_clock_gettime(CLOCK_MONOTONIC, &tp); 1.71 assert(status == 0, "gettime error"); 1.72 jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec); 1.73 return result; 1.74 @@ -1023,6 +1057,8 @@ 1.75 } 1.76 } 1.77 1.78 +#endif // __APPLE__ 1.79 + 1.80 void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { 1.81 if (Bsd::supports_monotonic_clock()) { 1.82 info_ptr->max_value = ALL_64_BITS;
2.1 --- a/src/os/bsd/vm/os_bsd.hpp Mon May 05 15:10:43 2014 +0200 2.2 +++ b/src/os/bsd/vm/os_bsd.hpp Fri Apr 25 07:40:33 2014 +0200 2.3 @@ -58,7 +58,13 @@ 2.4 // For signal flags diagnostics 2.5 static int sigflags[MAXSIGNUM]; 2.6 2.7 +#ifdef __APPLE__ 2.8 + // mach_absolute_time 2.9 + static mach_timebase_info_data_t _timebase_info; 2.10 + static volatile uint64_t _max_abstime; 2.11 +#else 2.12 static int (*_clock_gettime)(clockid_t, struct timespec *); 2.13 +#endif 2.14 2.15 static GrowableArray<int>* _cpu_to_node; 2.16 2.17 @@ -135,11 +141,11 @@ 2.18 static void clock_init(void); 2.19 2.20 static inline bool supports_monotonic_clock() { 2.21 +#ifdef __APPLE__ 2.22 + return true; 2.23 +#else 2.24 return _clock_gettime != NULL; 2.25 - } 2.26 - 2.27 - static int clock_gettime(clockid_t clock_id, struct timespec *tp) { 2.28 - return _clock_gettime ? _clock_gettime(clock_id, tp) : -1; 2.29 +#endif 2.30 } 2.31 2.32 // Stack repair handling
3.1 --- a/src/os/solaris/vm/os_solaris.cpp Mon May 05 15:10:43 2014 +0200 3.2 +++ b/src/os/solaris/vm/os_solaris.cpp Fri Apr 25 07:40:33 2014 +0200 3.3 @@ -415,11 +415,7 @@ 3.4 3.5 static hrtime_t first_hrtime = 0; 3.6 static const hrtime_t hrtime_hz = 1000*1000*1000; 3.7 -const int LOCK_BUSY = 1; 3.8 -const int LOCK_FREE = 0; 3.9 -const int LOCK_INVALID = -1; 3.10 static volatile hrtime_t max_hrtime = 0; 3.11 -static volatile int max_hrtime_lock = LOCK_FREE; // Update counter with LSB as lock-in-progress 3.12 3.13 3.14 void os::Solaris::initialize_system_info() { 3.15 @@ -1534,58 +1530,31 @@ 3.16 } 3.17 3.18 3.19 -// gethrtime can move backwards if read from one cpu and then a different cpu 3.20 -// getTimeNanos is guaranteed to not move backward on Solaris 3.21 -// local spinloop created as faster for a CAS on an int than 3.22 -// a CAS on a 64bit jlong. Also Atomic::cmpxchg for jlong is not 3.23 -// supported on sparc v8 or pre supports_cx8 intel boxes. 3.24 -// oldgetTimeNanos for systems which do not support CAS on 64bit jlong 3.25 -// i.e. sparc v8 and pre supports_cx8 (i486) intel boxes 3.26 -inline hrtime_t oldgetTimeNanos() { 3.27 - int gotlock = LOCK_INVALID; 3.28 - hrtime_t newtime = gethrtime(); 3.29 - 3.30 - for (;;) { 3.31 -// grab lock for max_hrtime 3.32 - int curlock = max_hrtime_lock; 3.33 - if (curlock & LOCK_BUSY) continue; 3.34 - if (gotlock = Atomic::cmpxchg(LOCK_BUSY, &max_hrtime_lock, LOCK_FREE) != LOCK_FREE) continue; 3.35 - if (newtime > max_hrtime) { 3.36 - max_hrtime = newtime; 3.37 - } else { 3.38 - newtime = max_hrtime; 3.39 - } 3.40 - // release lock 3.41 - max_hrtime_lock = LOCK_FREE; 3.42 - return newtime; 3.43 - } 3.44 -} 3.45 -// gethrtime can move backwards if read from one cpu and then a different cpu 3.46 -// getTimeNanos is guaranteed to not move backward on Solaris 3.47 +// gethrtime() should be monotonic according to the documentation, 3.48 +// but some virtualized platforms are known to break this guarantee. 3.49 +// getTimeNanos() must be guaranteed not to move backwards, so we 3.50 +// are forced to add a check here. 3.51 inline hrtime_t getTimeNanos() { 3.52 - if (VM_Version::supports_cx8()) { 3.53 - const hrtime_t now = gethrtime(); 3.54 - // Use atomic long load since 32-bit x86 uses 2 registers to keep long. 3.55 - const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime); 3.56 - if (now <= prev) return prev; // same or retrograde time; 3.57 - const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev); 3.58 - assert(obsv >= prev, "invariant"); // Monotonicity 3.59 - // If the CAS succeeded then we're done and return "now". 3.60 - // If the CAS failed and the observed value "obs" is >= now then 3.61 - // we should return "obs". If the CAS failed and now > obs > prv then 3.62 - // some other thread raced this thread and installed a new value, in which case 3.63 - // we could either (a) retry the entire operation, (b) retry trying to install now 3.64 - // or (c) just return obs. We use (c). No loop is required although in some cases 3.65 - // we might discard a higher "now" value in deference to a slightly lower but freshly 3.66 - // installed obs value. That's entirely benign -- it admits no new orderings compared 3.67 - // to (a) or (b) -- and greatly reduces coherence traffic. 3.68 - // We might also condition (c) on the magnitude of the delta between obs and now. 3.69 - // Avoiding excessive CAS operations to hot RW locations is critical. 3.70 - // See http://blogs.sun.com/dave/entry/cas_and_cache_trivia_invalidate 3.71 - return (prev == obsv) ? now : obsv ; 3.72 - } else { 3.73 - return oldgetTimeNanos(); 3.74 - } 3.75 + const hrtime_t now = gethrtime(); 3.76 + const hrtime_t prev = max_hrtime; 3.77 + if (now <= prev) { 3.78 + return prev; // same or retrograde time; 3.79 + } 3.80 + const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev); 3.81 + assert(obsv >= prev, "invariant"); // Monotonicity 3.82 + // If the CAS succeeded then we're done and return "now". 3.83 + // If the CAS failed and the observed value "obsv" is >= now then 3.84 + // we should return "obsv". If the CAS failed and now > obsv > prv then 3.85 + // some other thread raced this thread and installed a new value, in which case 3.86 + // we could either (a) retry the entire operation, (b) retry trying to install now 3.87 + // or (c) just return obsv. We use (c). No loop is required although in some cases 3.88 + // we might discard a higher "now" value in deference to a slightly lower but freshly 3.89 + // installed obsv value. That's entirely benign -- it admits no new orderings compared 3.90 + // to (a) or (b) -- and greatly reduces coherence traffic. 3.91 + // We might also condition (c) on the magnitude of the delta between obsv and now. 3.92 + // Avoiding excessive CAS operations to hot RW locations is critical. 3.93 + // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate 3.94 + return (prev == obsv) ? now : obsv; 3.95 } 3.96 3.97 // Time since start-up in seconds to a fine granularity.
4.1 --- a/src/share/vm/runtime/os.hpp Mon May 05 15:10:43 2014 +0200 4.2 +++ b/src/share/vm/runtime/os.hpp Fri Apr 25 07:40:33 2014 +0200 4.3 @@ -48,6 +48,9 @@ 4.4 #ifdef TARGET_OS_FAMILY_bsd 4.5 # include "jvm_bsd.h" 4.6 # include <setjmp.h> 4.7 +# ifdef __APPLE__ 4.8 +# include <mach/mach_time.h> 4.9 +# endif 4.10 #endif 4.11 4.12 class AgentLibrary;