src/share/vm/runtime/mutex.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/runtime/mutex.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,1390 @@
     1.4 +
     1.5 +/*
     1.6 + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
     1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8 + *
     1.9 + * This code is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License version 2 only, as
    1.11 + * published by the Free Software Foundation.
    1.12 + *
    1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.16 + * version 2 for more details (a copy is included in the LICENSE file that
    1.17 + * accompanied this code).
    1.18 + *
    1.19 + * You should have received a copy of the GNU General Public License version
    1.20 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.22 + *
    1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.24 + * or visit www.oracle.com if you need additional information or have any
    1.25 + * questions.
    1.26 + *
    1.27 + */
    1.28 +
    1.29 +#include "precompiled.hpp"
    1.30 +#include "runtime/mutex.hpp"
    1.31 +#include "runtime/osThread.hpp"
    1.32 +#include "runtime/thread.inline.hpp"
    1.33 +#include "utilities/events.hpp"
    1.34 +#ifdef TARGET_OS_FAMILY_linux
    1.35 +# include "mutex_linux.inline.hpp"
    1.36 +#endif
    1.37 +#ifdef TARGET_OS_FAMILY_solaris
    1.38 +# include "mutex_solaris.inline.hpp"
    1.39 +#endif
    1.40 +#ifdef TARGET_OS_FAMILY_windows
    1.41 +# include "mutex_windows.inline.hpp"
    1.42 +#endif
    1.43 +#ifdef TARGET_OS_FAMILY_bsd
    1.44 +# include "mutex_bsd.inline.hpp"
    1.45 +#endif
    1.46 +
    1.47 +PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
    1.48 +
    1.49 +// o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
    1.50 +//
    1.51 +// Native Monitor-Mutex locking - theory of operations
    1.52 +//
    1.53 +// * Native Monitors are completely unrelated to Java-level monitors,
    1.54 +//   although the "back-end" slow-path implementations share a common lineage.
    1.55 +//   See objectMonitor:: in synchronizer.cpp.
    1.56 +//   Native Monitors do *not* support nesting or recursion but otherwise
    1.57 +//   they're basically Hoare-flavor monitors.
    1.58 +//
    1.59 +// * A thread acquires ownership of a Monitor/Mutex by CASing the LockByte
    1.60 +//   in the _LockWord from zero to non-zero.  Note that the _Owner field
    1.61 +//   is advisory and is used only to verify that the thread calling unlock()
    1.62 +//   is indeed the last thread to have acquired the lock.
    1.63 +//
    1.64 +// * Contending threads "push" themselves onto the front of the contention
    1.65 +//   queue -- called the cxq -- with CAS and then spin/park.
    1.66 +//   The _LockWord contains the LockByte as well as the pointer to the head
    1.67 +//   of the cxq.  Colocating the LockByte with the cxq precludes certain races.
    1.68 +//
    1.69 +// * Using a separately addressable LockByte allows for CAS:MEMBAR or CAS:0
    1.70 +//   idioms.  We currently use MEMBAR in the uncontended unlock() path, as
    1.71 +//   MEMBAR often has less latency than CAS.  If warranted, we could switch to
    1.72 +//   a CAS:0 mode, using timers to close the resultant race, as is done
    1.73 +//   with Java Monitors in synchronizer.cpp.
    1.74 +//
    1.75 +//   See the following for a discussion of the relative cost of atomics (CAS)
    1.76 +//   MEMBAR, and ways to eliminate such instructions from the common-case paths:
    1.77 +//   -- http://blogs.sun.com/dave/entry/biased_locking_in_hotspot
    1.78 +//   -- http://blogs.sun.com/dave/resource/MustangSync.pdf
    1.79 +//   -- http://blogs.sun.com/dave/resource/synchronization-public2.pdf
    1.80 +//   -- synchronizer.cpp
    1.81 +//
    1.82 +// * Overall goals - desiderata
    1.83 +//   1. Minimize context switching
    1.84 +//   2. Minimize lock migration
    1.85 +//   3. Minimize CPI -- affinity and locality
    1.86 +//   4. Minimize the execution of high-latency instructions such as CAS or MEMBAR
    1.87 +//   5. Minimize outer lock hold times
    1.88 +//   6. Behave gracefully on a loaded system
    1.89 +//
    1.90 +// * Thread flow and list residency:
    1.91 +//
    1.92 +//   Contention queue --> EntryList --> OnDeck --> Owner --> !Owner
    1.93 +//   [..resident on monitor list..]
    1.94 +//   [...........contending..................]
    1.95 +//
    1.96 +//   -- The contention queue (cxq) contains recently-arrived threads (RATs).
    1.97 +//      Threads on the cxq eventually drain into the EntryList.
    1.98 +//   -- Invariant: a thread appears on at most one list -- cxq, EntryList
    1.99 +//      or WaitSet -- at any one time.
   1.100 +//   -- For a given monitor there can be at most one "OnDeck" thread at any
   1.101 +//      given time but if needbe this particular invariant could be relaxed.
   1.102 +//
   1.103 +// * The WaitSet and EntryList linked lists are composed of ParkEvents.
   1.104 +//   I use ParkEvent instead of threads as ParkEvents are immortal and
   1.105 +//   type-stable, meaning we can safely unpark() a possibly stale
   1.106 +//   list element in the unlock()-path.  (That's benign).
   1.107 +//
   1.108 +// * Succession policy - providing for progress:
   1.109 +//
   1.110 +//   As necessary, the unlock()ing thread identifies, unlinks, and unparks
   1.111 +//   an "heir presumptive" tentative successor thread from the EntryList.
   1.112 +//   This becomes the so-called "OnDeck" thread, of which there can be only
   1.113 +//   one at any given time for a given monitor.  The wakee will recontend
   1.114 +//   for ownership of monitor.
   1.115 +//
   1.116 +//   Succession is provided for by a policy of competitive handoff.
   1.117 +//   The exiting thread does _not_ grant or pass ownership to the
   1.118 +//   successor thread.  (This is also referred to as "handoff" succession").
   1.119 +//   Instead the exiting thread releases ownership and possibly wakes
   1.120 +//   a successor, so the successor can (re)compete for ownership of the lock.
   1.121 +//
   1.122 +//   Competitive handoff provides excellent overall throughput at the expense
   1.123 +//   of short-term fairness.  If fairness is a concern then one remedy might
   1.124 +//   be to add an AcquireCounter field to the monitor.  After a thread acquires
   1.125 +//   the lock it will decrement the AcquireCounter field.  When the count
   1.126 +//   reaches 0 the thread would reset the AcquireCounter variable, abdicate
   1.127 +//   the lock directly to some thread on the EntryList, and then move itself to the
   1.128 +//   tail of the EntryList.
   1.129 +//
   1.130 +//   But in practice most threads engage or otherwise participate in resource
   1.131 +//   bounded producer-consumer relationships, so lock domination is not usually
   1.132 +//   a practical concern.  Recall too, that in general it's easier to construct
   1.133 +//   a fair lock from a fast lock, but not vice-versa.
   1.134 +//
   1.135 +// * The cxq can have multiple concurrent "pushers" but only one concurrent
   1.136 +//   detaching thread.  This mechanism is immune from the ABA corruption.
   1.137 +//   More precisely, the CAS-based "push" onto cxq is ABA-oblivious.
   1.138 +//   We use OnDeck as a pseudo-lock to enforce the at-most-one detaching
   1.139 +//   thread constraint.
   1.140 +//
   1.141 +// * Taken together, the cxq and the EntryList constitute or form a
   1.142 +//   single logical queue of threads stalled trying to acquire the lock.
   1.143 +//   We use two distinct lists to reduce heat on the list ends.
   1.144 +//   Threads in lock() enqueue onto cxq while threads in unlock() will
   1.145 +//   dequeue from the EntryList.  (c.f. Michael Scott's "2Q" algorithm).
   1.146 +//   A key desideratum is to minimize queue & monitor metadata manipulation
   1.147 +//   that occurs while holding the "outer" monitor lock -- that is, we want to
   1.148 +//   minimize monitor lock holds times.
   1.149 +//
   1.150 +//   The EntryList is ordered by the prevailing queue discipline and
   1.151 +//   can be organized in any convenient fashion, such as a doubly-linked list or
   1.152 +//   a circular doubly-linked list.  If we need a priority queue then something akin
   1.153 +//   to Solaris' sleepq would work nicely.  Viz.,
   1.154 +//   -- http://agg.eng/ws/on10_nightly/source/usr/src/uts/common/os/sleepq.c.
   1.155 +//   -- http://cvs.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/os/sleepq.c
   1.156 +//   Queue discipline is enforced at ::unlock() time, when the unlocking thread
   1.157 +//   drains the cxq into the EntryList, and orders or reorders the threads on the
   1.158 +//   EntryList accordingly.
   1.159 +//
   1.160 +//   Barring "lock barging", this mechanism provides fair cyclic ordering,
   1.161 +//   somewhat similar to an elevator-scan.
   1.162 +//
   1.163 +// * OnDeck
   1.164 +//   --  For a given monitor there can be at most one OnDeck thread at any given
   1.165 +//       instant.  The OnDeck thread is contending for the lock, but has been
   1.166 +//       unlinked from the EntryList and cxq by some previous unlock() operations.
   1.167 +//       Once a thread has been designated the OnDeck thread it will remain so
   1.168 +//       until it manages to acquire the lock -- being OnDeck is a stable property.
   1.169 +//   --  Threads on the EntryList or cxq are _not allowed to attempt lock acquisition.
   1.170 +//   --  OnDeck also serves as an "inner lock" as follows.  Threads in unlock() will, after
   1.171 +//       having cleared the LockByte and dropped the outer lock,  attempt to "trylock"
   1.172 +//       OnDeck by CASing the field from null to non-null.  If successful, that thread
   1.173 +//       is then responsible for progress and succession and can use CAS to detach and
   1.174 +//       drain the cxq into the EntryList.  By convention, only this thread, the holder of
   1.175 +//       the OnDeck inner lock, can manipulate the EntryList or detach and drain the
   1.176 +//       RATs on the cxq into the EntryList.  This avoids ABA corruption on the cxq as
   1.177 +//       we allow multiple concurrent "push" operations but restrict detach concurrency
   1.178 +//       to at most one thread.  Having selected and detached a successor, the thread then
   1.179 +//       changes the OnDeck to refer to that successor, and then unparks the successor.
   1.180 +//       That successor will eventually acquire the lock and clear OnDeck.  Beware
   1.181 +//       that the OnDeck usage as a lock is asymmetric.  A thread in unlock() transiently
   1.182 +//       "acquires" OnDeck, performs queue manipulations, passes OnDeck to some successor,
   1.183 +//       and then the successor eventually "drops" OnDeck.  Note that there's never
   1.184 +//       any sense of contention on the inner lock, however.  Threads never contend
   1.185 +//       or wait for the inner lock.
   1.186 +//   --  OnDeck provides for futile wakeup throttling a described in section 3.3 of
   1.187 +//       See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf
   1.188 +//       In a sense, OnDeck subsumes the ObjectMonitor _Succ and ObjectWaiter
   1.189 +//       TState fields found in Java-level objectMonitors.  (See synchronizer.cpp).
   1.190 +//
   1.191 +// * Waiting threads reside on the WaitSet list -- wait() puts
   1.192 +//   the caller onto the WaitSet.  Notify() or notifyAll() simply
   1.193 +//   transfers threads from the WaitSet to either the EntryList or cxq.
   1.194 +//   Subsequent unlock() operations will eventually unpark the notifyee.
   1.195 +//   Unparking a notifee in notify() proper is inefficient - if we were to do so
   1.196 +//   it's likely the notifyee would simply impale itself on the lock held
   1.197 +//   by the notifier.
   1.198 +//
   1.199 +// * The mechanism is obstruction-free in that if the holder of the transient
   1.200 +//   OnDeck lock in unlock() is preempted or otherwise stalls, other threads
   1.201 +//   can still acquire and release the outer lock and continue to make progress.
   1.202 +//   At worst, waking of already blocked contending threads may be delayed,
   1.203 +//   but nothing worse.  (We only use "trylock" operations on the inner OnDeck
   1.204 +//   lock).
   1.205 +//
   1.206 +// * Note that thread-local storage must be initialized before a thread
   1.207 +//   uses Native monitors or mutexes.  The native monitor-mutex subsystem
   1.208 +//   depends on Thread::current().
   1.209 +//
   1.210 +// * The monitor synchronization subsystem avoids the use of native
   1.211 +//   synchronization primitives except for the narrow platform-specific
   1.212 +//   park-unpark abstraction.  See the comments in os_solaris.cpp regarding
   1.213 +//   the semantics of park-unpark.  Put another way, this monitor implementation
   1.214 +//   depends only on atomic operations and park-unpark.  The monitor subsystem
   1.215 +//   manages all RUNNING->BLOCKED and BLOCKED->READY transitions while the
   1.216 +//   underlying OS manages the READY<->RUN transitions.
   1.217 +//
   1.218 +// * The memory consistency model provide by lock()-unlock() is at least as
   1.219 +//   strong or stronger than the Java Memory model defined by JSR-133.
   1.220 +//   That is, we guarantee at least entry consistency, if not stronger.
   1.221 +//   See http://g.oswego.edu/dl/jmm/cookbook.html.
   1.222 +//
   1.223 +// * Thread:: currently contains a set of purpose-specific ParkEvents:
   1.224 +//   _MutexEvent, _ParkEvent, etc.  A better approach might be to do away with
   1.225 +//   the purpose-specific ParkEvents and instead implement a general per-thread
   1.226 +//   stack of available ParkEvents which we could provision on-demand.  The
   1.227 +//   stack acts as a local cache to avoid excessive calls to ParkEvent::Allocate()
   1.228 +//   and ::Release().  A thread would simply pop an element from the local stack before it
   1.229 +//   enqueued or park()ed.  When the contention was over the thread would
   1.230 +//   push the no-longer-needed ParkEvent back onto its stack.
   1.231 +//
   1.232 +// * A slightly reduced form of ILock() and IUnlock() have been partially
   1.233 +//   model-checked (Murphi) for safety and progress at T=1,2,3 and 4.
   1.234 +//   It'd be interesting to see if TLA/TLC could be useful as well.
   1.235 +//
   1.236 +// * Mutex-Monitor is a low-level "leaf" subsystem.  That is, the monitor
   1.237 +//   code should never call other code in the JVM that might itself need to
   1.238 +//   acquire monitors or mutexes.  That's true *except* in the case of the
   1.239 +//   ThreadBlockInVM state transition wrappers.  The ThreadBlockInVM DTOR handles
   1.240 +//   mutator reentry (ingress) by checking for a pending safepoint in which case it will
   1.241 +//   call SafepointSynchronize::block(), which in turn may call Safepoint_lock->lock(), etc.
   1.242 +//   In that particular case a call to lock() for a given Monitor can end up recursively
   1.243 +//   calling lock() on another monitor.   While distasteful, this is largely benign
   1.244 +//   as the calls come from jacket that wraps lock(), and not from deep within lock() itself.
   1.245 +//
   1.246 +//   It's unfortunate that native mutexes and thread state transitions were convolved.
   1.247 +//   They're really separate concerns and should have remained that way.  Melding
   1.248 +//   them together was facile -- a bit too facile.   The current implementation badly
   1.249 +//   conflates the two concerns.
   1.250 +//
   1.251 +// * TODO-FIXME:
   1.252 +//
   1.253 +//   -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
   1.254 +//      We should also add DTRACE probes in the ParkEvent subsystem for
   1.255 +//      Park-entry, Park-exit, and Unpark.
   1.256 +//
   1.257 +//   -- We have an excess of mutex-like constructs in the JVM, namely:
   1.258 +//      1. objectMonitors for Java-level synchronization (synchronizer.cpp)
   1.259 +//      2. low-level muxAcquire and muxRelease
   1.260 +//      3. low-level spinAcquire and spinRelease
   1.261 +//      4. native Mutex:: and Monitor::
   1.262 +//      5. jvm_raw_lock() and _unlock()
   1.263 +//      6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
   1.264 +//         similar name.
   1.265 +//
   1.266 +// o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
   1.267 +
   1.268 +
   1.269 +// CASPTR() uses the canonical argument order that dominates in the literature.
   1.270 +// Our internal cmpxchg_ptr() uses a bastardized ordering to accommodate Sun .il templates.
   1.271 +
   1.272 +#define CASPTR(a,c,s) intptr_t(Atomic::cmpxchg_ptr ((void *)(s),(void *)(a),(void *)(c)))
   1.273 +#define UNS(x) (uintptr_t(x))
   1.274 +#define TRACE(m) { static volatile int ctr = 0 ; int x = ++ctr ; if ((x & (x-1))==0) { ::printf ("%d:%s\n", x, #m); ::fflush(stdout); }}
   1.275 +
   1.276 +// Simplistic low-quality Marsaglia SHIFT-XOR RNG.
   1.277 +// Bijective except for the trailing mask operation.
   1.278 +// Useful for spin loops as the compiler can't optimize it away.
   1.279 +
   1.280 +static inline jint MarsagliaXORV (jint x) {
   1.281 +  if (x == 0) x = 1|os::random() ;
   1.282 +  x ^= x << 6;
   1.283 +  x ^= ((unsigned)x) >> 21;
   1.284 +  x ^= x << 7 ;
   1.285 +  return x & 0x7FFFFFFF ;
   1.286 +}
   1.287 +
   1.288 +static inline jint MarsagliaXOR (jint * const a) {
   1.289 +  jint x = *a ;
   1.290 +  if (x == 0) x = UNS(a)|1 ;
   1.291 +  x ^= x << 6;
   1.292 +  x ^= ((unsigned)x) >> 21;
   1.293 +  x ^= x << 7 ;
   1.294 +  *a = x ;
   1.295 +  return x & 0x7FFFFFFF ;
   1.296 +}
   1.297 +
   1.298 +static int Stall (int its) {
   1.299 +  static volatile jint rv = 1 ;
   1.300 +  volatile int OnFrame = 0 ;
   1.301 +  jint v = rv ^ UNS(OnFrame) ;
   1.302 +  while (--its >= 0) {
   1.303 +    v = MarsagliaXORV (v) ;
   1.304 +  }
   1.305 +  // Make this impossible for the compiler to optimize away,
   1.306 +  // but (mostly) avoid W coherency sharing on MP systems.
   1.307 +  if (v == 0x12345) rv = v ;
   1.308 +  return v ;
   1.309 +}
   1.310 +
   1.311 +int Monitor::TryLock () {
   1.312 +  intptr_t v = _LockWord.FullWord ;
   1.313 +  for (;;) {
   1.314 +    if ((v & _LBIT) != 0) return 0 ;
   1.315 +    const intptr_t u = CASPTR (&_LockWord, v, v|_LBIT) ;
   1.316 +    if (v == u) return 1 ;
   1.317 +    v = u ;
   1.318 +  }
   1.319 +}
   1.320 +
   1.321 +int Monitor::TryFast () {
   1.322 +  // Optimistic fast-path form ...
   1.323 +  // Fast-path attempt for the common uncontended case.
   1.324 +  // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
   1.325 +  intptr_t v = CASPTR (&_LockWord, 0, _LBIT) ;  // agro ...
   1.326 +  if (v == 0) return 1 ;
   1.327 +
   1.328 +  for (;;) {
   1.329 +    if ((v & _LBIT) != 0) return 0 ;
   1.330 +    const intptr_t u = CASPTR (&_LockWord, v, v|_LBIT) ;
   1.331 +    if (v == u) return 1 ;
   1.332 +    v = u ;
   1.333 +  }
   1.334 +}
   1.335 +
   1.336 +int Monitor::ILocked () {
   1.337 +  const intptr_t w = _LockWord.FullWord & 0xFF ;
   1.338 +  assert (w == 0 || w == _LBIT, "invariant") ;
   1.339 +  return w == _LBIT ;
   1.340 +}
   1.341 +
   1.342 +// Polite TATAS spinlock with exponential backoff - bounded spin.
   1.343 +// Ideally we'd use processor cycles, time or vtime to control
   1.344 +// the loop, but we currently use iterations.
   1.345 +// All the constants within were derived empirically but work over
   1.346 +// over the spectrum of J2SE reference platforms.
   1.347 +// On Niagara-class systems the back-off is unnecessary but
   1.348 +// is relatively harmless.  (At worst it'll slightly retard
   1.349 +// acquisition times).  The back-off is critical for older SMP systems
   1.350 +// where constant fetching of the LockWord would otherwise impair
   1.351 +// scalability.
   1.352 +//
   1.353 +// Clamp spinning at approximately 1/2 of a context-switch round-trip.
   1.354 +// See synchronizer.cpp for details and rationale.
   1.355 +
   1.356 +int Monitor::TrySpin (Thread * const Self) {
   1.357 +  if (TryLock())    return 1 ;
   1.358 +  if (!os::is_MP()) return 0 ;
   1.359 +
   1.360 +  int Probes  = 0 ;
   1.361 +  int Delay   = 0 ;
   1.362 +  int Steps   = 0 ;
   1.363 +  int SpinMax = NativeMonitorSpinLimit ;
   1.364 +  int flgs    = NativeMonitorFlags ;
   1.365 +  for (;;) {
   1.366 +    intptr_t v = _LockWord.FullWord;
   1.367 +    if ((v & _LBIT) == 0) {
   1.368 +      if (CASPTR (&_LockWord, v, v|_LBIT) == v) {
   1.369 +        return 1 ;
   1.370 +      }
   1.371 +      continue ;
   1.372 +    }
   1.373 +
   1.374 +    if ((flgs & 8) == 0) {
   1.375 +      SpinPause () ;
   1.376 +    }
   1.377 +
   1.378 +    // Periodically increase Delay -- variable Delay form
   1.379 +    // conceptually: delay *= 1 + 1/Exponent
   1.380 +    ++ Probes;
   1.381 +    if (Probes > SpinMax) return 0 ;
   1.382 +
   1.383 +    if ((Probes & 0x7) == 0) {
   1.384 +      Delay = ((Delay << 1)|1) & 0x7FF ;
   1.385 +      // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
   1.386 +    }
   1.387 +
   1.388 +    if (flgs & 2) continue ;
   1.389 +
   1.390 +    // Consider checking _owner's schedctl state, if OFFPROC abort spin.
   1.391 +    // If the owner is OFFPROC then it's unlike that the lock will be dropped
   1.392 +    // in a timely fashion, which suggests that spinning would not be fruitful
   1.393 +    // or profitable.
   1.394 +
   1.395 +    // Stall for "Delay" time units - iterations in the current implementation.
   1.396 +    // Avoid generating coherency traffic while stalled.
   1.397 +    // Possible ways to delay:
   1.398 +    //   PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt,
   1.399 +    //   wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ...
   1.400 +    // Note that on Niagara-class systems we want to minimize STs in the
   1.401 +    // spin loop.  N1 and brethren write-around the L1$ over the xbar into the L2$.
   1.402 +    // Furthermore, they don't have a W$ like traditional SPARC processors.
   1.403 +    // We currently use a Marsaglia Shift-Xor RNG loop.
   1.404 +    Steps += Delay ;
   1.405 +    if (Self != NULL) {
   1.406 +      jint rv = Self->rng[0] ;
   1.407 +      for (int k = Delay ; --k >= 0; ) {
   1.408 +        rv = MarsagliaXORV (rv) ;
   1.409 +        if ((flgs & 4) == 0 && SafepointSynchronize::do_call_back()) return 0 ;
   1.410 +      }
   1.411 +      Self->rng[0] = rv ;
   1.412 +    } else {
   1.413 +      Stall (Delay) ;
   1.414 +    }
   1.415 +  }
   1.416 +}
   1.417 +
   1.418 +static int ParkCommon (ParkEvent * ev, jlong timo) {
   1.419 +  // Diagnostic support - periodically unwedge blocked threads
   1.420 +  intx nmt = NativeMonitorTimeout ;
   1.421 +  if (nmt > 0 && (nmt < timo || timo <= 0)) {
   1.422 +     timo = nmt ;
   1.423 +  }
   1.424 +  int err = OS_OK ;
   1.425 +  if (0 == timo) {
   1.426 +    ev->park() ;
   1.427 +  } else {
   1.428 +    err = ev->park(timo) ;
   1.429 +  }
   1.430 +  return err ;
   1.431 +}
   1.432 +
   1.433 +inline int Monitor::AcquireOrPush (ParkEvent * ESelf) {
   1.434 +  intptr_t v = _LockWord.FullWord ;
   1.435 +  for (;;) {
   1.436 +    if ((v & _LBIT) == 0) {
   1.437 +      const intptr_t u = CASPTR (&_LockWord, v, v|_LBIT) ;
   1.438 +      if (u == v) return 1 ;        // indicate acquired
   1.439 +      v = u ;
   1.440 +    } else {
   1.441 +      // Anticipate success ...
   1.442 +      ESelf->ListNext = (ParkEvent *) (v & ~_LBIT) ;
   1.443 +      const intptr_t u = CASPTR (&_LockWord, v, intptr_t(ESelf)|_LBIT) ;
   1.444 +      if (u == v) return 0 ;        // indicate pushed onto cxq
   1.445 +      v = u ;
   1.446 +    }
   1.447 +    // Interference - LockWord change - just retry
   1.448 +  }
   1.449 +}
   1.450 +
   1.451 +// ILock and IWait are the lowest level primitive internal blocking
   1.452 +// synchronization functions.  The callers of IWait and ILock must have
   1.453 +// performed any needed state transitions beforehand.
   1.454 +// IWait and ILock may directly call park() without any concern for thread state.
   1.455 +// Note that ILock and IWait do *not* access _owner.
   1.456 +// _owner is a higher-level logical concept.
   1.457 +
   1.458 +void Monitor::ILock (Thread * Self) {
   1.459 +  assert (_OnDeck != Self->_MutexEvent, "invariant") ;
   1.460 +
   1.461 +  if (TryFast()) {
   1.462 + Exeunt:
   1.463 +    assert (ILocked(), "invariant") ;
   1.464 +    return ;
   1.465 +  }
   1.466 +
   1.467 +  ParkEvent * const ESelf = Self->_MutexEvent ;
   1.468 +  assert (_OnDeck != ESelf, "invariant") ;
   1.469 +
   1.470 +  // As an optimization, spinners could conditionally try to set ONDECK to _LBIT
   1.471 +  // Synchronizer.cpp uses a similar optimization.
   1.472 +  if (TrySpin (Self)) goto Exeunt ;
   1.473 +
   1.474 +  // Slow-path - the lock is contended.
   1.475 +  // Either Enqueue Self on cxq or acquire the outer lock.
   1.476 +  // LockWord encoding = (cxq,LOCKBYTE)
   1.477 +  ESelf->reset() ;
   1.478 +  OrderAccess::fence() ;
   1.479 +
   1.480 +  // Optional optimization ... try barging on the inner lock
   1.481 +  if ((NativeMonitorFlags & 32) && CASPTR (&_OnDeck, NULL, UNS(Self)) == 0) {
   1.482 +    goto OnDeck_LOOP ;
   1.483 +  }
   1.484 +
   1.485 +  if (AcquireOrPush (ESelf)) goto Exeunt ;
   1.486 +
   1.487 +  // At any given time there is at most one ondeck thread.
   1.488 +  // ondeck implies not resident on cxq and not resident on EntryList
   1.489 +  // Only the OnDeck thread can try to acquire -- contended for -- the lock.
   1.490 +  // CONSIDER: use Self->OnDeck instead of m->OnDeck.
   1.491 +  // Deschedule Self so that others may run.
   1.492 +  while (_OnDeck != ESelf) {
   1.493 +    ParkCommon (ESelf, 0) ;
   1.494 +  }
   1.495 +
   1.496 +  // Self is now in the ONDECK position and will remain so until it
   1.497 +  // manages to acquire the lock.
   1.498 + OnDeck_LOOP:
   1.499 +  for (;;) {
   1.500 +    assert (_OnDeck == ESelf, "invariant") ;
   1.501 +    if (TrySpin (Self)) break ;
   1.502 +    // CONSIDER: if ESelf->TryPark() && TryLock() break ...
   1.503 +    // It's probably wise to spin only if we *actually* blocked
   1.504 +    // CONSIDER: check the lockbyte, if it remains set then
   1.505 +    // preemptively drain the cxq into the EntryList.
   1.506 +    // The best place and time to perform queue operations -- lock metadata --
   1.507 +    // is _before having acquired the outer lock, while waiting for the lock to drop.
   1.508 +    ParkCommon (ESelf, 0) ;
   1.509 +  }
   1.510 +
   1.511 +  assert (_OnDeck == ESelf, "invariant") ;
   1.512 +  _OnDeck = NULL ;
   1.513 +
   1.514 +  // Note that we current drop the inner lock (clear OnDeck) in the slow-path
   1.515 +  // epilog immediately after having acquired the outer lock.
   1.516 +  // But instead we could consider the following optimizations:
   1.517 +  // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation.
   1.518 +  //    This might avoid potential reacquisition of the inner lock in IUlock().
   1.519 +  // B. While still holding the inner lock, attempt to opportunistically select
   1.520 +  //    and unlink the next ONDECK thread from the EntryList.
   1.521 +  //    If successful, set ONDECK to refer to that thread, otherwise clear ONDECK.
   1.522 +  //    It's critical that the select-and-unlink operation run in constant-time as
   1.523 +  //    it executes when holding the outer lock and may artificially increase the
   1.524 +  //    effective length of the critical section.
   1.525 +  // Note that (A) and (B) are tantamount to succession by direct handoff for
   1.526 +  // the inner lock.
   1.527 +  goto Exeunt ;
   1.528 +}
   1.529 +
   1.530 +void Monitor::IUnlock (bool RelaxAssert) {
   1.531 +  assert (ILocked(), "invariant") ;
   1.532 +  // Conceptually we need a MEMBAR #storestore|#loadstore barrier or fence immediately
   1.533 +  // before the store that releases the lock.  Crucially, all the stores and loads in the
   1.534 +  // critical section must be globally visible before the store of 0 into the lock-word
   1.535 +  // that releases the lock becomes globally visible.  That is, memory accesses in the
   1.536 +  // critical section should not be allowed to bypass or overtake the following ST that
   1.537 +  // releases the lock.  As such, to prevent accesses within the critical section
   1.538 +  // from "leaking" out, we need a release fence between the critical section and the
   1.539 +  // store that releases the lock.  In practice that release barrier is elided on
   1.540 +  // platforms with strong memory models such as TSO.
   1.541 +  //
   1.542 +  // Note that the OrderAccess::storeload() fence that appears after unlock store
   1.543 +  // provides for progress conditions and succession and is _not related to exclusion
   1.544 +  // safety or lock release consistency.
   1.545 +  OrderAccess::release_store(&_LockWord.Bytes[_LSBINDEX], 0); // drop outer lock
   1.546 +
   1.547 +  OrderAccess::storeload ();
   1.548 +  ParkEvent * const w = _OnDeck ;
   1.549 +  assert (RelaxAssert || w != Thread::current()->_MutexEvent, "invariant") ;
   1.550 +  if (w != NULL) {
   1.551 +    // Either we have a valid ondeck thread or ondeck is transiently "locked"
   1.552 +    // by some exiting thread as it arranges for succession.  The LSBit of
   1.553 +    // OnDeck allows us to discriminate two cases.  If the latter, the
   1.554 +    // responsibility for progress and succession lies with that other thread.
   1.555 +    // For good performance, we also depend on the fact that redundant unpark()
   1.556 +    // operations are cheap.  That is, repeated Unpark()ing of the ONDECK thread
   1.557 +    // is inexpensive.  This approach provides implicit futile wakeup throttling.
   1.558 +    // Note that the referent "w" might be stale with respect to the lock.
   1.559 +    // In that case the following unpark() is harmless and the worst that'll happen
   1.560 +    // is a spurious return from a park() operation.  Critically, if "w" _is stale,
   1.561 +    // then progress is known to have occurred as that means the thread associated
   1.562 +    // with "w" acquired the lock.  In that case this thread need take no further
   1.563 +    // action to guarantee progress.
   1.564 +    if ((UNS(w) & _LBIT) == 0) w->unpark() ;
   1.565 +    return ;
   1.566 +  }
   1.567 +
   1.568 +  intptr_t cxq = _LockWord.FullWord ;
   1.569 +  if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
   1.570 +    return ;      // normal fast-path exit - cxq and EntryList both empty
   1.571 +  }
   1.572 +  if (cxq & _LBIT) {
   1.573 +    // Optional optimization ...
   1.574 +    // Some other thread acquired the lock in the window since this
   1.575 +    // thread released it.  Succession is now that thread's responsibility.
   1.576 +    return ;
   1.577 +  }
   1.578 +
   1.579 + Succession:
   1.580 +  // Slow-path exit - this thread must ensure succession and progress.
   1.581 +  // OnDeck serves as lock to protect cxq and EntryList.
   1.582 +  // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
   1.583 +  // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
   1.584 +  // but only one concurrent consumer (detacher of RATs).
   1.585 +  // Consider protecting this critical section with schedctl on Solaris.
   1.586 +  // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
   1.587 +  // picks a successor and marks that thread as OnDeck.  That successor
   1.588 +  // thread will then clear OnDeck once it eventually acquires the outer lock.
   1.589 +  if (CASPTR (&_OnDeck, NULL, _LBIT) != UNS(NULL)) {
   1.590 +    return ;
   1.591 +  }
   1.592 +
   1.593 +  ParkEvent * List = _EntryList ;
   1.594 +  if (List != NULL) {
   1.595 +    // Transfer the head of the EntryList to the OnDeck position.
   1.596 +    // Once OnDeck, a thread stays OnDeck until it acquires the lock.
   1.597 +    // For a given lock there is at most OnDeck thread at any one instant.
   1.598 +   WakeOne:
   1.599 +    assert (List == _EntryList, "invariant") ;
   1.600 +    ParkEvent * const w = List ;
   1.601 +    assert (RelaxAssert || w != Thread::current()->_MutexEvent, "invariant") ;
   1.602 +    _EntryList = w->ListNext ;
   1.603 +    // as a diagnostic measure consider setting w->_ListNext = BAD
   1.604 +    assert (UNS(_OnDeck) == _LBIT, "invariant") ;
   1.605 +    _OnDeck = w ;           // pass OnDeck to w.
   1.606 +                            // w will clear OnDeck once it acquires the outer lock
   1.607 +
   1.608 +    // Another optional optimization ...
   1.609 +    // For heavily contended locks it's not uncommon that some other
   1.610 +    // thread acquired the lock while this thread was arranging succession.
   1.611 +    // Try to defer the unpark() operation - Delegate the responsibility
   1.612 +    // for unpark()ing the OnDeck thread to the current or subsequent owners
   1.613 +    // That is, the new owner is responsible for unparking the OnDeck thread.
   1.614 +    OrderAccess::storeload() ;
   1.615 +    cxq = _LockWord.FullWord ;
   1.616 +    if (cxq & _LBIT) return ;
   1.617 +
   1.618 +    w->unpark() ;
   1.619 +    return ;
   1.620 +  }
   1.621 +
   1.622 +  cxq = _LockWord.FullWord ;
   1.623 +  if ((cxq & ~_LBIT) != 0) {
   1.624 +    // The EntryList is empty but the cxq is populated.
   1.625 +    // drain RATs from cxq into EntryList
   1.626 +    // Detach RATs segment with CAS and then merge into EntryList
   1.627 +    for (;;) {
   1.628 +      // optional optimization - if locked, the owner is responsible for succession
   1.629 +      if (cxq & _LBIT) goto Punt ;
   1.630 +      const intptr_t vfy = CASPTR (&_LockWord, cxq, cxq & _LBIT) ;
   1.631 +      if (vfy == cxq) break ;
   1.632 +      cxq = vfy ;
   1.633 +      // Interference - LockWord changed - Just retry
   1.634 +      // We can see concurrent interference from contending threads
   1.635 +      // pushing themselves onto the cxq or from lock-unlock operations.
   1.636 +      // From the perspective of this thread, EntryList is stable and
   1.637 +      // the cxq is prepend-only -- the head is volatile but the interior
   1.638 +      // of the cxq is stable.  In theory if we encounter interference from threads
   1.639 +      // pushing onto cxq we could simply break off the original cxq suffix and
   1.640 +      // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
   1.641 +      // on the high-traffic LockWord variable.   For instance lets say the cxq is "ABCD"
   1.642 +      // when we first fetch cxq above.  Between the fetch -- where we observed "A"
   1.643 +      // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
   1.644 +      // yielding cxq = "PQRABCD".  In this case we could simply set A.ListNext
   1.645 +      // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
   1.646 +      // Note too, that it's safe for this thread to traverse the cxq
   1.647 +      // without taking any special concurrency precautions.
   1.648 +    }
   1.649 +
   1.650 +    // We don't currently reorder the cxq segment as we move it onto
   1.651 +    // the EntryList, but it might make sense to reverse the order
   1.652 +    // or perhaps sort by thread priority.  See the comments in
   1.653 +    // synchronizer.cpp objectMonitor::exit().
   1.654 +    assert (_EntryList == NULL, "invariant") ;
   1.655 +    _EntryList = List = (ParkEvent *)(cxq & ~_LBIT) ;
   1.656 +    assert (List != NULL, "invariant") ;
   1.657 +    goto WakeOne ;
   1.658 +  }
   1.659 +
   1.660 +  // cxq|EntryList is empty.
   1.661 +  // w == NULL implies that cxq|EntryList == NULL in the past.
   1.662 +  // Possible race - rare inopportune interleaving.
   1.663 +  // A thread could have added itself to cxq since this thread previously checked.
   1.664 +  // Detect and recover by refetching cxq.
   1.665 + Punt:
   1.666 +  assert (UNS(_OnDeck) == _LBIT, "invariant") ;
   1.667 +  _OnDeck = NULL ;            // Release inner lock.
   1.668 +  OrderAccess::storeload();   // Dekker duality - pivot point
   1.669 +
   1.670 +  // Resample LockWord/cxq to recover from possible race.
   1.671 +  // For instance, while this thread T1 held OnDeck, some other thread T2 might
   1.672 +  // acquire the outer lock.  Another thread T3 might try to acquire the outer
   1.673 +  // lock, but encounter contention and enqueue itself on cxq.  T2 then drops the
   1.674 +  // outer lock, but skips succession as this thread T1 still holds OnDeck.
   1.675 +  // T1 is and remains responsible for ensuring succession of T3.
   1.676 +  //
   1.677 +  // Note that we don't need to recheck EntryList, just cxq.
   1.678 +  // If threads moved onto EntryList since we dropped OnDeck
   1.679 +  // that implies some other thread forced succession.
   1.680 +  cxq = _LockWord.FullWord ;
   1.681 +  if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
   1.682 +    goto Succession ;         // potential race -- re-run succession
   1.683 +  }
   1.684 +  return ;
   1.685 +}
   1.686 +
   1.687 +bool Monitor::notify() {
   1.688 +  assert (_owner == Thread::current(), "invariant") ;
   1.689 +  assert (ILocked(), "invariant") ;
   1.690 +  if (_WaitSet == NULL) return true ;
   1.691 +  NotifyCount ++ ;
   1.692 +
   1.693 +  // Transfer one thread from the WaitSet to the EntryList or cxq.
   1.694 +  // Currently we just unlink the head of the WaitSet and prepend to the cxq.
   1.695 +  // And of course we could just unlink it and unpark it, too, but
   1.696 +  // in that case it'd likely impale itself on the reentry.
   1.697 +  Thread::muxAcquire (_WaitLock, "notify:WaitLock") ;
   1.698 +  ParkEvent * nfy = _WaitSet ;
   1.699 +  if (nfy != NULL) {                  // DCL idiom
   1.700 +    _WaitSet = nfy->ListNext ;
   1.701 +    assert (nfy->Notified == 0, "invariant") ;
   1.702 +    // push nfy onto the cxq
   1.703 +    for (;;) {
   1.704 +      const intptr_t v = _LockWord.FullWord ;
   1.705 +      assert ((v & 0xFF) == _LBIT, "invariant") ;
   1.706 +      nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
   1.707 +      if (CASPTR (&_LockWord, v, UNS(nfy)|_LBIT) == v) break;
   1.708 +      // interference - _LockWord changed -- just retry
   1.709 +    }
   1.710 +    // Note that setting Notified before pushing nfy onto the cxq is
   1.711 +    // also legal and safe, but the safety properties are much more
   1.712 +    // subtle, so for the sake of code stewardship ...
   1.713 +    OrderAccess::fence() ;
   1.714 +    nfy->Notified = 1;
   1.715 +  }
   1.716 +  Thread::muxRelease (_WaitLock) ;
   1.717 +  if (nfy != NULL && (NativeMonitorFlags & 16)) {
   1.718 +    // Experimental code ... light up the wakee in the hope that this thread (the owner)
   1.719 +    // will drop the lock just about the time the wakee comes ONPROC.
   1.720 +    nfy->unpark() ;
   1.721 +  }
   1.722 +  assert (ILocked(), "invariant") ;
   1.723 +  return true ;
   1.724 +}
   1.725 +
   1.726 +// Currently notifyAll() transfers the waiters one-at-a-time from the waitset
   1.727 +// to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,
   1.728 +// but in practice notifyAll() for large #s of threads is rare and not time-critical.
   1.729 +// Beware too, that we invert the order of the waiters.  Lets say that the
   1.730 +// waitset is "ABCD" and the cxq is "XYZ".  After a notifyAll() the waitset
   1.731 +// will be empty and the cxq will be "DCBAXYZ".  This is benign, of course.
   1.732 +
   1.733 +bool Monitor::notify_all() {
   1.734 +  assert (_owner == Thread::current(), "invariant") ;
   1.735 +  assert (ILocked(), "invariant") ;
   1.736 +  while (_WaitSet != NULL) notify() ;
   1.737 +  return true ;
   1.738 +}
   1.739 +
   1.740 +int Monitor::IWait (Thread * Self, jlong timo) {
   1.741 +  assert (ILocked(), "invariant") ;
   1.742 +
   1.743 +  // Phases:
   1.744 +  // 1. Enqueue Self on WaitSet - currently prepend
   1.745 +  // 2. unlock - drop the outer lock
   1.746 +  // 3. wait for either notification or timeout
   1.747 +  // 4. lock - reentry - reacquire the outer lock
   1.748 +
   1.749 +  ParkEvent * const ESelf = Self->_MutexEvent ;
   1.750 +  ESelf->Notified = 0 ;
   1.751 +  ESelf->reset() ;
   1.752 +  OrderAccess::fence() ;
   1.753 +
   1.754 +  // Add Self to WaitSet
   1.755 +  // Ideally only the holder of the outer lock would manipulate the WaitSet -
   1.756 +  // That is, the outer lock would implicitly protect the WaitSet.
   1.757 +  // But if a thread in wait() encounters a timeout it will need to dequeue itself
   1.758 +  // from the WaitSet _before it becomes the owner of the lock.  We need to dequeue
   1.759 +  // as the ParkEvent -- which serves as a proxy for the thread -- can't reside
   1.760 +  // on both the WaitSet and the EntryList|cxq at the same time..  That is, a thread
   1.761 +  // on the WaitSet can't be allowed to compete for the lock until it has managed to
   1.762 +  // unlink its ParkEvent from WaitSet.  Thus the need for WaitLock.
   1.763 +  // Contention on the WaitLock is minimal.
   1.764 +  //
   1.765 +  // Another viable approach would be add another ParkEvent, "WaitEvent" to the
   1.766 +  // thread class.  The WaitSet would be composed of WaitEvents.  Only the
   1.767 +  // owner of the outer lock would manipulate the WaitSet.  A thread in wait()
   1.768 +  // could then compete for the outer lock, and then, if necessary, unlink itself
   1.769 +  // from the WaitSet only after having acquired the outer lock.  More precisely,
   1.770 +  // there would be no WaitLock.  A thread in in wait() would enqueue its WaitEvent
   1.771 +  // on the WaitSet; release the outer lock; wait for either notification or timeout;
   1.772 +  // reacquire the inner lock; and then, if needed, unlink itself from the WaitSet.
   1.773 +  //
   1.774 +  // Alternatively, a 2nd set of list link fields in the ParkEvent might suffice.
   1.775 +  // One set would be for the WaitSet and one for the EntryList.
   1.776 +  // We could also deconstruct the ParkEvent into a "pure" event and add a
   1.777 +  // new immortal/TSM "ListElement" class that referred to ParkEvents.
   1.778 +  // In that case we could have one ListElement on the WaitSet and another
   1.779 +  // on the EntryList, with both referring to the same pure Event.
   1.780 +
   1.781 +  Thread::muxAcquire (_WaitLock, "wait:WaitLock:Add") ;
   1.782 +  ESelf->ListNext = _WaitSet ;
   1.783 +  _WaitSet = ESelf ;
   1.784 +  Thread::muxRelease (_WaitLock) ;
   1.785 +
   1.786 +  // Release the outer lock
   1.787 +  // We call IUnlock (RelaxAssert=true) as a thread T1 might
   1.788 +  // enqueue itself on the WaitSet, call IUnlock(), drop the lock,
   1.789 +  // and then stall before it can attempt to wake a successor.
   1.790 +  // Some other thread T2 acquires the lock, and calls notify(), moving
   1.791 +  // T1 from the WaitSet to the cxq.  T2 then drops the lock.  T1 resumes,
   1.792 +  // and then finds *itself* on the cxq.  During the course of a normal
   1.793 +  // IUnlock() call a thread should _never find itself on the EntryList
   1.794 +  // or cxq, but in the case of wait() it's possible.
   1.795 +  // See synchronizer.cpp objectMonitor::wait().
   1.796 +  IUnlock (true) ;
   1.797 +
   1.798 +  // Wait for either notification or timeout
   1.799 +  // Beware that in some circumstances we might propagate
   1.800 +  // spurious wakeups back to the caller.
   1.801 +
   1.802 +  for (;;) {
   1.803 +    if (ESelf->Notified) break ;
   1.804 +    int err = ParkCommon (ESelf, timo) ;
   1.805 +    if (err == OS_TIMEOUT || (NativeMonitorFlags & 1)) break ;
   1.806 +  }
   1.807 +
   1.808 +  // Prepare for reentry - if necessary, remove ESelf from WaitSet
   1.809 +  // ESelf can be:
   1.810 +  // 1. Still on the WaitSet.  This can happen if we exited the loop by timeout.
   1.811 +  // 2. On the cxq or EntryList
   1.812 +  // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position.
   1.813 +
   1.814 +  OrderAccess::fence() ;
   1.815 +  int WasOnWaitSet = 0 ;
   1.816 +  if (ESelf->Notified == 0) {
   1.817 +    Thread::muxAcquire (_WaitLock, "wait:WaitLock:remove") ;
   1.818 +    if (ESelf->Notified == 0) {     // DCL idiom
   1.819 +      assert (_OnDeck != ESelf, "invariant") ;   // can't be both OnDeck and on WaitSet
   1.820 +      // ESelf is resident on the WaitSet -- unlink it.
   1.821 +      // A doubly-linked list would be better here so we can unlink in constant-time.
   1.822 +      // We have to unlink before we potentially recontend as ESelf might otherwise
   1.823 +      // end up on the cxq|EntryList -- it can't be on two lists at once.
   1.824 +      ParkEvent * p = _WaitSet ;
   1.825 +      ParkEvent * q = NULL ;            // classic q chases p
   1.826 +      while (p != NULL && p != ESelf) {
   1.827 +        q = p ;
   1.828 +        p = p->ListNext ;
   1.829 +      }
   1.830 +      assert (p == ESelf, "invariant") ;
   1.831 +      if (p == _WaitSet) {      // found at head
   1.832 +        assert (q == NULL, "invariant") ;
   1.833 +        _WaitSet = p->ListNext ;
   1.834 +      } else {                  // found in interior
   1.835 +        assert (q->ListNext == p, "invariant") ;
   1.836 +        q->ListNext = p->ListNext ;
   1.837 +      }
   1.838 +      WasOnWaitSet = 1 ;        // We were *not* notified but instead encountered timeout
   1.839 +    }
   1.840 +    Thread::muxRelease (_WaitLock) ;
   1.841 +  }
   1.842 +
   1.843 +  // Reentry phase - reacquire the lock
   1.844 +  if (WasOnWaitSet) {
   1.845 +    // ESelf was previously on the WaitSet but we just unlinked it above
   1.846 +    // because of a timeout.  ESelf is not resident on any list and is not OnDeck
   1.847 +    assert (_OnDeck != ESelf, "invariant") ;
   1.848 +    ILock (Self) ;
   1.849 +  } else {
   1.850 +    // A prior notify() operation moved ESelf from the WaitSet to the cxq.
   1.851 +    // ESelf is now on the cxq, EntryList or at the OnDeck position.
   1.852 +    // The following fragment is extracted from Monitor::ILock()
   1.853 +    for (;;) {
   1.854 +      if (_OnDeck == ESelf && TrySpin(Self)) break ;
   1.855 +      ParkCommon (ESelf, 0) ;
   1.856 +    }
   1.857 +    assert (_OnDeck == ESelf, "invariant") ;
   1.858 +    _OnDeck = NULL ;
   1.859 +  }
   1.860 +
   1.861 +  assert (ILocked(), "invariant") ;
   1.862 +  return WasOnWaitSet != 0 ;        // return true IFF timeout
   1.863 +}
   1.864 +
   1.865 +
   1.866 +// ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
   1.867 +// In particular, there are certain types of global lock that may be held
   1.868 +// by a Java thread while it is blocked at a safepoint but before it has
   1.869 +// written the _owner field. These locks may be sneakily acquired by the
   1.870 +// VM thread during a safepoint to avoid deadlocks. Alternatively, one should
   1.871 +// identify all such locks, and ensure that Java threads never block at
   1.872 +// safepoints while holding them (_no_safepoint_check_flag). While it
   1.873 +// seems as though this could increase the time to reach a safepoint
   1.874 +// (or at least increase the mean, if not the variance), the latter
   1.875 +// approach might make for a cleaner, more maintainable JVM design.
   1.876 +//
   1.877 +// Sneaking is vile and reprehensible and should be excised at the 1st
   1.878 +// opportunity.  It's possible that the need for sneaking could be obviated
   1.879 +// as follows.  Currently, a thread might (a) while TBIVM, call pthread_mutex_lock
   1.880 +// or ILock() thus acquiring the "physical" lock underlying Monitor/Mutex.
   1.881 +// (b) stall at the TBIVM exit point as a safepoint is in effect.  Critically,
   1.882 +// it'll stall at the TBIVM reentry state transition after having acquired the
   1.883 +// underlying lock, but before having set _owner and having entered the actual
   1.884 +// critical section.  The lock-sneaking facility leverages that fact and allowed the
   1.885 +// VM thread to logically acquire locks that had already be physically locked by mutators
   1.886 +// but where mutators were known blocked by the reentry thread state transition.
   1.887 +//
   1.888 +// If we were to modify the Monitor-Mutex so that TBIVM state transitions tightly
   1.889 +// wrapped calls to park(), then we could likely do away with sneaking.  We'd
   1.890 +// decouple lock acquisition and parking.  The critical invariant  to eliminating
   1.891 +// sneaking is to ensure that we never "physically" acquire the lock while TBIVM.
   1.892 +// An easy way to accomplish this is to wrap the park calls in a narrow TBIVM jacket.
   1.893 +// One difficulty with this approach is that the TBIVM wrapper could recurse and
   1.894 +// call lock() deep from within a lock() call, while the MutexEvent was already enqueued.
   1.895 +// Using a stack (N=2 at minimum) of ParkEvents would take care of that problem.
   1.896 +//
   1.897 +// But of course the proper ultimate approach is to avoid schemes that require explicit
   1.898 +// sneaking or dependence on any any clever invariants or subtle implementation properties
   1.899 +// of Mutex-Monitor and instead directly address the underlying design flaw.
   1.900 +
   1.901 +void Monitor::lock (Thread * Self) {
   1.902 +#ifdef CHECK_UNHANDLED_OOPS
   1.903 +  // Clear unhandled oops so we get a crash right away.  Only clear for non-vm
   1.904 +  // or GC threads.
   1.905 +  if (Self->is_Java_thread()) {
   1.906 +    Self->clear_unhandled_oops();
   1.907 +  }
   1.908 +#endif // CHECK_UNHANDLED_OOPS
   1.909 +
   1.910 +  debug_only(check_prelock_state(Self));
   1.911 +  assert (_owner != Self              , "invariant") ;
   1.912 +  assert (_OnDeck != Self->_MutexEvent, "invariant") ;
   1.913 +
   1.914 +  if (TryFast()) {
   1.915 + Exeunt:
   1.916 +    assert (ILocked(), "invariant") ;
   1.917 +    assert (owner() == NULL, "invariant");
   1.918 +    set_owner (Self);
   1.919 +    return ;
   1.920 +  }
   1.921 +
   1.922 +  // The lock is contended ...
   1.923 +
   1.924 +  bool can_sneak = Self->is_VM_thread() && SafepointSynchronize::is_at_safepoint();
   1.925 +  if (can_sneak && _owner == NULL) {
   1.926 +    // a java thread has locked the lock but has not entered the
   1.927 +    // critical region -- let's just pretend we've locked the lock
   1.928 +    // and go on.  we note this with _snuck so we can also
   1.929 +    // pretend to unlock when the time comes.
   1.930 +    _snuck = true;
   1.931 +    goto Exeunt ;
   1.932 +  }
   1.933 +
   1.934 +  // Try a brief spin to avoid passing thru thread state transition ...
   1.935 +  if (TrySpin (Self)) goto Exeunt ;
   1.936 +
   1.937 +  check_block_state(Self);
   1.938 +  if (Self->is_Java_thread()) {
   1.939 +    // Horribile dictu - we suffer through a state transition
   1.940 +    assert(rank() > Mutex::special, "Potential deadlock with special or lesser rank mutex");
   1.941 +    ThreadBlockInVM tbivm ((JavaThread *) Self) ;
   1.942 +    ILock (Self) ;
   1.943 +  } else {
   1.944 +    // Mirabile dictu
   1.945 +    ILock (Self) ;
   1.946 +  }
   1.947 +  goto Exeunt ;
   1.948 +}
   1.949 +
   1.950 +void Monitor::lock() {
   1.951 +  this->lock(Thread::current());
   1.952 +}
   1.953 +
   1.954 +// Lock without safepoint check - a degenerate variant of lock().
   1.955 +// Should ONLY be used by safepoint code and other code
   1.956 +// that is guaranteed not to block while running inside the VM. If this is called with
   1.957 +// thread state set to be in VM, the safepoint synchronization code will deadlock!
   1.958 +
   1.959 +void Monitor::lock_without_safepoint_check (Thread * Self) {
   1.960 +  assert (_owner != Self, "invariant") ;
   1.961 +  ILock (Self) ;
   1.962 +  assert (_owner == NULL, "invariant");
   1.963 +  set_owner (Self);
   1.964 +}
   1.965 +
   1.966 +void Monitor::lock_without_safepoint_check () {
   1.967 +  lock_without_safepoint_check (Thread::current()) ;
   1.968 +}
   1.969 +
   1.970 +
   1.971 +// Returns true if thread succeceed [sic] in grabbing the lock, otherwise false.
   1.972 +
   1.973 +bool Monitor::try_lock() {
   1.974 +  Thread * const Self = Thread::current();
   1.975 +  debug_only(check_prelock_state(Self));
   1.976 +  // assert(!thread->is_inside_signal_handler(), "don't lock inside signal handler");
   1.977 +
   1.978 +  // Special case, where all Java threads are stopped.
   1.979 +  // The lock may have been acquired but _owner is not yet set.
   1.980 +  // In that case the VM thread can safely grab the lock.
   1.981 +  // It strikes me this should appear _after the TryLock() fails, below.
   1.982 +  bool can_sneak = Self->is_VM_thread() && SafepointSynchronize::is_at_safepoint();
   1.983 +  if (can_sneak && _owner == NULL) {
   1.984 +    set_owner(Self); // Do not need to be atomic, since we are at a safepoint
   1.985 +    _snuck = true;
   1.986 +    return true;
   1.987 +  }
   1.988 +
   1.989 +  if (TryLock()) {
   1.990 +    // We got the lock
   1.991 +    assert (_owner == NULL, "invariant");
   1.992 +    set_owner (Self);
   1.993 +    return true;
   1.994 +  }
   1.995 +  return false;
   1.996 +}
   1.997 +
   1.998 +void Monitor::unlock() {
   1.999 +  assert (_owner  == Thread::current(), "invariant") ;
  1.1000 +  assert (_OnDeck != Thread::current()->_MutexEvent , "invariant") ;
  1.1001 +  set_owner (NULL) ;
  1.1002 +  if (_snuck) {
  1.1003 +    assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
  1.1004 +    _snuck = false;
  1.1005 +    return ;
  1.1006 +  }
  1.1007 +  IUnlock (false) ;
  1.1008 +}
  1.1009 +
  1.1010 +// Yet another degenerate version of Monitor::lock() or lock_without_safepoint_check()
  1.1011 +// jvm_raw_lock() and _unlock() can be called by non-Java threads via JVM_RawMonitorEnter.
  1.1012 +//
  1.1013 +// There's no expectation that JVM_RawMonitors will interoperate properly with the native
  1.1014 +// Mutex-Monitor constructs.  We happen to implement JVM_RawMonitors in terms of
  1.1015 +// native Mutex-Monitors simply as a matter of convenience.  A simple abstraction layer
  1.1016 +// over a pthread_mutex_t would work equally as well, but require more platform-specific
  1.1017 +// code -- a "PlatformMutex".  Alternatively, a simply layer over muxAcquire-muxRelease
  1.1018 +// would work too.
  1.1019 +//
  1.1020 +// Since the caller might be a foreign thread, we don't necessarily have a Thread.MutexEvent
  1.1021 +// instance available.  Instead, we transiently allocate a ParkEvent on-demand if
  1.1022 +// we encounter contention.  That ParkEvent remains associated with the thread
  1.1023 +// until it manages to acquire the lock, at which time we return the ParkEvent
  1.1024 +// to the global ParkEvent free list.  This is correct and suffices for our purposes.
  1.1025 +//
  1.1026 +// Beware that the original jvm_raw_unlock() had a "_snuck" test but that
  1.1027 +// jvm_raw_lock() didn't have the corresponding test.  I suspect that's an
  1.1028 +// oversight, but I've replicated the original suspect logic in the new code ...
  1.1029 +
  1.1030 +void Monitor::jvm_raw_lock() {
  1.1031 +  assert(rank() == native, "invariant");
  1.1032 +
  1.1033 +  if (TryLock()) {
  1.1034 + Exeunt:
  1.1035 +    assert (ILocked(), "invariant") ;
  1.1036 +    assert (_owner == NULL, "invariant");
  1.1037 +    // This can potentially be called by non-java Threads. Thus, the ThreadLocalStorage
  1.1038 +    // might return NULL. Don't call set_owner since it will break on an NULL owner
  1.1039 +    // Consider installing a non-null "ANON" distinguished value instead of just NULL.
  1.1040 +    _owner = ThreadLocalStorage::thread();
  1.1041 +    return ;
  1.1042 +  }
  1.1043 +
  1.1044 +  if (TrySpin(NULL)) goto Exeunt ;
  1.1045 +
  1.1046 +  // slow-path - apparent contention
  1.1047 +  // Allocate a ParkEvent for transient use.
  1.1048 +  // The ParkEvent remains associated with this thread until
  1.1049 +  // the time the thread manages to acquire the lock.
  1.1050 +  ParkEvent * const ESelf = ParkEvent::Allocate(NULL) ;
  1.1051 +  ESelf->reset() ;
  1.1052 +  OrderAccess::storeload() ;
  1.1053 +
  1.1054 +  // Either Enqueue Self on cxq or acquire the outer lock.
  1.1055 +  if (AcquireOrPush (ESelf)) {
  1.1056 +    ParkEvent::Release (ESelf) ;      // surrender the ParkEvent
  1.1057 +    goto Exeunt ;
  1.1058 +  }
  1.1059 +
  1.1060 +  // At any given time there is at most one ondeck thread.
  1.1061 +  // ondeck implies not resident on cxq and not resident on EntryList
  1.1062 +  // Only the OnDeck thread can try to acquire -- contended for -- the lock.
  1.1063 +  // CONSIDER: use Self->OnDeck instead of m->OnDeck.
  1.1064 +  for (;;) {
  1.1065 +    if (_OnDeck == ESelf && TrySpin(NULL)) break ;
  1.1066 +    ParkCommon (ESelf, 0) ;
  1.1067 +  }
  1.1068 +
  1.1069 +  assert (_OnDeck == ESelf, "invariant") ;
  1.1070 +  _OnDeck = NULL ;
  1.1071 +  ParkEvent::Release (ESelf) ;      // surrender the ParkEvent
  1.1072 +  goto Exeunt ;
  1.1073 +}
  1.1074 +
  1.1075 +void Monitor::jvm_raw_unlock() {
  1.1076 +  // Nearly the same as Monitor::unlock() ...
  1.1077 +  // directly set _owner instead of using set_owner(null)
  1.1078 +  _owner = NULL ;
  1.1079 +  if (_snuck) {         // ???
  1.1080 +    assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
  1.1081 +    _snuck = false;
  1.1082 +    return ;
  1.1083 +  }
  1.1084 +  IUnlock(false) ;
  1.1085 +}
  1.1086 +
  1.1087 +bool Monitor::wait(bool no_safepoint_check, long timeout, bool as_suspend_equivalent) {
  1.1088 +  Thread * const Self = Thread::current() ;
  1.1089 +  assert (_owner == Self, "invariant") ;
  1.1090 +  assert (ILocked(), "invariant") ;
  1.1091 +
  1.1092 +  // as_suspend_equivalent logically implies !no_safepoint_check
  1.1093 +  guarantee (!as_suspend_equivalent || !no_safepoint_check, "invariant") ;
  1.1094 +  // !no_safepoint_check logically implies java_thread
  1.1095 +  guarantee (no_safepoint_check || Self->is_Java_thread(), "invariant") ;
  1.1096 +
  1.1097 +  #ifdef ASSERT
  1.1098 +    Monitor * least = get_least_ranked_lock_besides_this(Self->owned_locks());
  1.1099 +    assert(least != this, "Specification of get_least_... call above");
  1.1100 +    if (least != NULL && least->rank() <= special) {
  1.1101 +      tty->print("Attempting to wait on monitor %s/%d while holding"
  1.1102 +                 " lock %s/%d -- possible deadlock",
  1.1103 +                 name(), rank(), least->name(), least->rank());
  1.1104 +      assert(false, "Shouldn't block(wait) while holding a lock of rank special");
  1.1105 +    }
  1.1106 +  #endif // ASSERT
  1.1107 +
  1.1108 +  int wait_status ;
  1.1109 +  // conceptually set the owner to NULL in anticipation of
  1.1110 +  // abdicating the lock in wait
  1.1111 +  set_owner(NULL);
  1.1112 +  if (no_safepoint_check) {
  1.1113 +    wait_status = IWait (Self, timeout) ;
  1.1114 +  } else {
  1.1115 +    assert (Self->is_Java_thread(), "invariant") ;
  1.1116 +    JavaThread *jt = (JavaThread *)Self;
  1.1117 +
  1.1118 +    // Enter safepoint region - ornate and Rococo ...
  1.1119 +    ThreadBlockInVM tbivm(jt);
  1.1120 +    OSThreadWaitState osts(Self->osthread(), false /* not Object.wait() */);
  1.1121 +
  1.1122 +    if (as_suspend_equivalent) {
  1.1123 +      jt->set_suspend_equivalent();
  1.1124 +      // cleared by handle_special_suspend_equivalent_condition() or
  1.1125 +      // java_suspend_self()
  1.1126 +    }
  1.1127 +
  1.1128 +    wait_status = IWait (Self, timeout) ;
  1.1129 +
  1.1130 +    // were we externally suspended while we were waiting?
  1.1131 +    if (as_suspend_equivalent && jt->handle_special_suspend_equivalent_condition()) {
  1.1132 +      // Our event wait has finished and we own the lock, but
  1.1133 +      // while we were waiting another thread suspended us. We don't
  1.1134 +      // want to hold the lock while suspended because that
  1.1135 +      // would surprise the thread that suspended us.
  1.1136 +      assert (ILocked(), "invariant") ;
  1.1137 +      IUnlock (true) ;
  1.1138 +      jt->java_suspend_self();
  1.1139 +      ILock (Self) ;
  1.1140 +      assert (ILocked(), "invariant") ;
  1.1141 +    }
  1.1142 +  }
  1.1143 +
  1.1144 +  // Conceptually reestablish ownership of the lock.
  1.1145 +  // The "real" lock -- the LockByte -- was reacquired by IWait().
  1.1146 +  assert (ILocked(), "invariant") ;
  1.1147 +  assert (_owner == NULL, "invariant") ;
  1.1148 +  set_owner (Self) ;
  1.1149 +  return wait_status != 0 ;          // return true IFF timeout
  1.1150 +}
  1.1151 +
  1.1152 +Monitor::~Monitor() {
  1.1153 +  assert ((UNS(_owner)|UNS(_LockWord.FullWord)|UNS(_EntryList)|UNS(_WaitSet)|UNS(_OnDeck)) == 0, "") ;
  1.1154 +}
  1.1155 +
  1.1156 +void Monitor::ClearMonitor (Monitor * m, const char *name) {
  1.1157 +  m->_owner             = NULL ;
  1.1158 +  m->_snuck             = false ;
  1.1159 +  if (name == NULL) {
  1.1160 +    strcpy(m->_name, "UNKNOWN") ;
  1.1161 +  } else {
  1.1162 +    strncpy(m->_name, name, MONITOR_NAME_LEN - 1);
  1.1163 +    m->_name[MONITOR_NAME_LEN - 1] = '\0';
  1.1164 +  }
  1.1165 +  m->_LockWord.FullWord = 0 ;
  1.1166 +  m->_EntryList         = NULL ;
  1.1167 +  m->_OnDeck            = NULL ;
  1.1168 +  m->_WaitSet           = NULL ;
  1.1169 +  m->_WaitLock[0]       = 0 ;
  1.1170 +}
  1.1171 +
  1.1172 +Monitor::Monitor() { ClearMonitor(this); }
  1.1173 +
  1.1174 +Monitor::Monitor (int Rank, const char * name, bool allow_vm_block) {
  1.1175 +  ClearMonitor (this, name) ;
  1.1176 +#ifdef ASSERT
  1.1177 +  _allow_vm_block  = allow_vm_block;
  1.1178 +  _rank            = Rank ;
  1.1179 +#endif
  1.1180 +}
  1.1181 +
  1.1182 +Mutex::~Mutex() {
  1.1183 +  assert ((UNS(_owner)|UNS(_LockWord.FullWord)|UNS(_EntryList)|UNS(_WaitSet)|UNS(_OnDeck)) == 0, "") ;
  1.1184 +}
  1.1185 +
  1.1186 +Mutex::Mutex (int Rank, const char * name, bool allow_vm_block) {
  1.1187 +  ClearMonitor ((Monitor *) this, name) ;
  1.1188 +#ifdef ASSERT
  1.1189 + _allow_vm_block   = allow_vm_block;
  1.1190 + _rank             = Rank ;
  1.1191 +#endif
  1.1192 +}
  1.1193 +
  1.1194 +bool Monitor::owned_by_self() const {
  1.1195 +  bool ret = _owner == Thread::current();
  1.1196 +  assert (!ret || _LockWord.Bytes[_LSBINDEX] != 0, "invariant") ;
  1.1197 +  return ret;
  1.1198 +}
  1.1199 +
  1.1200 +void Monitor::print_on_error(outputStream* st) const {
  1.1201 +  st->print("[" PTR_FORMAT, this);
  1.1202 +  st->print("] %s", _name);
  1.1203 +  st->print(" - owner thread: " PTR_FORMAT, _owner);
  1.1204 +}
  1.1205 +
  1.1206 +
  1.1207 +
  1.1208 +
  1.1209 +// ----------------------------------------------------------------------------------
  1.1210 +// Non-product code
  1.1211 +
  1.1212 +#ifndef PRODUCT
  1.1213 +void Monitor::print_on(outputStream* st) const {
  1.1214 +  st->print_cr("Mutex: [0x%lx/0x%lx] %s - owner: 0x%lx", this, _LockWord.FullWord, _name, _owner);
  1.1215 +}
  1.1216 +#endif
  1.1217 +
  1.1218 +#ifndef PRODUCT
  1.1219 +#ifdef ASSERT
  1.1220 +Monitor * Monitor::get_least_ranked_lock(Monitor * locks) {
  1.1221 +  Monitor *res, *tmp;
  1.1222 +  for (res = tmp = locks; tmp != NULL; tmp = tmp->next()) {
  1.1223 +    if (tmp->rank() < res->rank()) {
  1.1224 +      res = tmp;
  1.1225 +    }
  1.1226 +  }
  1.1227 +  if (!SafepointSynchronize::is_at_safepoint()) {
  1.1228 +    // In this case, we expect the held locks to be
  1.1229 +    // in increasing rank order (modulo any native ranks)
  1.1230 +    for (tmp = locks; tmp != NULL; tmp = tmp->next()) {
  1.1231 +      if (tmp->next() != NULL) {
  1.1232 +        assert(tmp->rank() == Mutex::native ||
  1.1233 +               tmp->rank() <= tmp->next()->rank(), "mutex rank anomaly?");
  1.1234 +      }
  1.1235 +    }
  1.1236 +  }
  1.1237 +  return res;
  1.1238 +}
  1.1239 +
  1.1240 +Monitor* Monitor::get_least_ranked_lock_besides_this(Monitor* locks) {
  1.1241 +  Monitor *res, *tmp;
  1.1242 +  for (res = NULL, tmp = locks; tmp != NULL; tmp = tmp->next()) {
  1.1243 +    if (tmp != this && (res == NULL || tmp->rank() < res->rank())) {
  1.1244 +      res = tmp;
  1.1245 +    }
  1.1246 +  }
  1.1247 +  if (!SafepointSynchronize::is_at_safepoint()) {
  1.1248 +    // In this case, we expect the held locks to be
  1.1249 +    // in increasing rank order (modulo any native ranks)
  1.1250 +    for (tmp = locks; tmp != NULL; tmp = tmp->next()) {
  1.1251 +      if (tmp->next() != NULL) {
  1.1252 +        assert(tmp->rank() == Mutex::native ||
  1.1253 +               tmp->rank() <= tmp->next()->rank(), "mutex rank anomaly?");
  1.1254 +      }
  1.1255 +    }
  1.1256 +  }
  1.1257 +  return res;
  1.1258 +}
  1.1259 +
  1.1260 +
  1.1261 +bool Monitor::contains(Monitor* locks, Monitor * lock) {
  1.1262 +  for (; locks != NULL; locks = locks->next()) {
  1.1263 +    if (locks == lock)
  1.1264 +      return true;
  1.1265 +  }
  1.1266 +  return false;
  1.1267 +}
  1.1268 +#endif
  1.1269 +
  1.1270 +// Called immediately after lock acquisition or release as a diagnostic
  1.1271 +// to track the lock-set of the thread and test for rank violations that
  1.1272 +// might indicate exposure to deadlock.
  1.1273 +// Rather like an EventListener for _owner (:>).
  1.1274 +
  1.1275 +void Monitor::set_owner_implementation(Thread *new_owner) {
  1.1276 +  // This function is solely responsible for maintaining
  1.1277 +  // and checking the invariant that threads and locks
  1.1278 +  // are in a 1/N relation, with some some locks unowned.
  1.1279 +  // It uses the Mutex::_owner, Mutex::_next, and
  1.1280 +  // Thread::_owned_locks fields, and no other function
  1.1281 +  // changes those fields.
  1.1282 +  // It is illegal to set the mutex from one non-NULL
  1.1283 +  // owner to another--it must be owned by NULL as an
  1.1284 +  // intermediate state.
  1.1285 +
  1.1286 +  if (new_owner != NULL) {
  1.1287 +    // the thread is acquiring this lock
  1.1288 +
  1.1289 +    assert(new_owner == Thread::current(), "Should I be doing this?");
  1.1290 +    assert(_owner == NULL, "setting the owner thread of an already owned mutex");
  1.1291 +    _owner = new_owner; // set the owner
  1.1292 +
  1.1293 +    // link "this" into the owned locks list
  1.1294 +
  1.1295 +    #ifdef ASSERT  // Thread::_owned_locks is under the same ifdef
  1.1296 +      Monitor* locks = get_least_ranked_lock(new_owner->owned_locks());
  1.1297 +                    // Mutex::set_owner_implementation is a friend of Thread
  1.1298 +
  1.1299 +      assert(this->rank() >= 0, "bad lock rank");
  1.1300 +
  1.1301 +      // Deadlock avoidance rules require us to acquire Mutexes only in
  1.1302 +      // a global total order. For example m1 is the lowest ranked mutex
  1.1303 +      // that the thread holds and m2 is the mutex the thread is trying
  1.1304 +      // to acquire, then  deadlock avoidance rules require that the rank
  1.1305 +      // of m2 be less  than the rank of m1.
  1.1306 +      // The rank Mutex::native  is an exception in that it is not subject
  1.1307 +      // to the verification rules.
  1.1308 +      // Here are some further notes relating to mutex acquisition anomalies:
  1.1309 +      // . under Solaris, the interrupt lock gets acquired when doing
  1.1310 +      //   profiling, so any lock could be held.
  1.1311 +      // . it is also ok to acquire Safepoint_lock at the very end while we
  1.1312 +      //   already hold Terminator_lock - may happen because of periodic safepoints
  1.1313 +      if (this->rank() != Mutex::native &&
  1.1314 +          this->rank() != Mutex::suspend_resume &&
  1.1315 +          locks != NULL && locks->rank() <= this->rank() &&
  1.1316 +          !SafepointSynchronize::is_at_safepoint() &&
  1.1317 +          this != Interrupt_lock && this != ProfileVM_lock &&
  1.1318 +          !(this == Safepoint_lock && contains(locks, Terminator_lock) &&
  1.1319 +            SafepointSynchronize::is_synchronizing())) {
  1.1320 +        new_owner->print_owned_locks();
  1.1321 +        fatal(err_msg("acquiring lock %s/%d out of order with lock %s/%d -- "
  1.1322 +                      "possible deadlock", this->name(), this->rank(),
  1.1323 +                      locks->name(), locks->rank()));
  1.1324 +      }
  1.1325 +
  1.1326 +      this->_next = new_owner->_owned_locks;
  1.1327 +      new_owner->_owned_locks = this;
  1.1328 +    #endif
  1.1329 +
  1.1330 +  } else {
  1.1331 +    // the thread is releasing this lock
  1.1332 +
  1.1333 +    Thread* old_owner = _owner;
  1.1334 +    debug_only(_last_owner = old_owner);
  1.1335 +
  1.1336 +    assert(old_owner != NULL, "removing the owner thread of an unowned mutex");
  1.1337 +    assert(old_owner == Thread::current(), "removing the owner thread of an unowned mutex");
  1.1338 +
  1.1339 +    _owner = NULL; // set the owner
  1.1340 +
  1.1341 +    #ifdef ASSERT
  1.1342 +      Monitor *locks = old_owner->owned_locks();
  1.1343 +
  1.1344 +      // remove "this" from the owned locks list
  1.1345 +
  1.1346 +      Monitor *prev = NULL;
  1.1347 +      bool found = false;
  1.1348 +      for (; locks != NULL; prev = locks, locks = locks->next()) {
  1.1349 +        if (locks == this) {
  1.1350 +          found = true;
  1.1351 +          break;
  1.1352 +        }
  1.1353 +      }
  1.1354 +      assert(found, "Removing a lock not owned");
  1.1355 +      if (prev == NULL) {
  1.1356 +        old_owner->_owned_locks = _next;
  1.1357 +      } else {
  1.1358 +        prev->_next = _next;
  1.1359 +      }
  1.1360 +      _next = NULL;
  1.1361 +    #endif
  1.1362 +  }
  1.1363 +}
  1.1364 +
  1.1365 +
  1.1366 +// Factored out common sanity checks for locking mutex'es. Used by lock() and try_lock()
  1.1367 +void Monitor::check_prelock_state(Thread *thread) {
  1.1368 +  assert((!thread->is_Java_thread() || ((JavaThread *)thread)->thread_state() == _thread_in_vm)
  1.1369 +         || rank() == Mutex::special, "wrong thread state for using locks");
  1.1370 +  if (StrictSafepointChecks) {
  1.1371 +    if (thread->is_VM_thread() && !allow_vm_block()) {
  1.1372 +      fatal(err_msg("VM thread using lock %s (not allowed to block on)",
  1.1373 +                    name()));
  1.1374 +    }
  1.1375 +    debug_only(if (rank() != Mutex::special) \
  1.1376 +      thread->check_for_valid_safepoint_state(false);)
  1.1377 +  }
  1.1378 +  if (thread->is_Watcher_thread()) {
  1.1379 +    assert(!WatcherThread::watcher_thread()->has_crash_protection(),
  1.1380 +        "locking not allowed when crash protection is set");
  1.1381 +  }
  1.1382 +}
  1.1383 +
  1.1384 +void Monitor::check_block_state(Thread *thread) {
  1.1385 +  if (!_allow_vm_block && thread->is_VM_thread()) {
  1.1386 +    warning("VM thread blocked on lock");
  1.1387 +    print();
  1.1388 +    BREAKPOINT;
  1.1389 +  }
  1.1390 +  assert(_owner != thread, "deadlock: blocking on monitor owned by current thread");
  1.1391 +}
  1.1392 +
  1.1393 +#endif // PRODUCT

mercurial