1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/runtime/mutex.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,1356 @@ 1.4 + 1.5 +/* 1.6 + * Copyright 1998-2005 Sun Microsystems, Inc. All Rights Reserved. 1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 + * 1.9 + * This code is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License version 2 only, as 1.11 + * published by the Free Software Foundation. 1.12 + * 1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.16 + * version 2 for more details (a copy is included in the LICENSE file that 1.17 + * accompanied this code). 1.18 + * 1.19 + * You should have received a copy of the GNU General Public License version 1.20 + * 2 along with this work; if not, write to the Free Software Foundation, 1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.22 + * 1.23 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.24 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.25 + * have any questions. 1.26 + * 1.27 + */ 1.28 + 1.29 +# include "incls/_precompiled.incl" 1.30 +# include "incls/_mutex.cpp.incl" 1.31 + 1.32 +// o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o 1.33 +// 1.34 +// Native Monitor-Mutex locking - theory of operations 1.35 +// 1.36 +// * Native Monitors are completely unrelated to Java-level monitors, 1.37 +// although the "back-end" slow-path implementations share a common lineage. 1.38 +// See objectMonitor:: in synchronizer.cpp. 1.39 +// Native Monitors do *not* support nesting or recursion but otherwise 1.40 +// they're basically Hoare-flavor monitors. 1.41 +// 1.42 +// * A thread acquires ownership of a Monitor/Mutex by CASing the LockByte 1.43 +// in the _LockWord from zero to non-zero. Note that the _Owner field 1.44 +// is advisory and is used only to verify that the thread calling unlock() 1.45 +// is indeed the last thread to have acquired the lock. 1.46 +// 1.47 +// * Contending threads "push" themselves onto the front of the contention 1.48 +// queue -- called the cxq -- with CAS and then spin/park. 1.49 +// The _LockWord contains the LockByte as well as the pointer to the head 1.50 +// of the cxq. Colocating the LockByte with the cxq precludes certain races. 1.51 +// 1.52 +// * Using a separately addressable LockByte allows for CAS:MEMBAR or CAS:0 1.53 +// idioms. We currently use MEMBAR in the uncontended unlock() path, as 1.54 +// MEMBAR often has less latency than CAS. If warranted, we could switch to 1.55 +// a CAS:0 mode, using timers to close the resultant race, as is done 1.56 +// with Java Monitors in synchronizer.cpp. 1.57 +// 1.58 +// See the following for a discussion of the relative cost of atomics (CAS) 1.59 +// MEMBAR, and ways to eliminate such instructions from the common-case paths: 1.60 +// -- http://blogs.sun.com/dave/entry/biased_locking_in_hotspot 1.61 +// -- http://blogs.sun.com/dave/resource/MustangSync.pdf 1.62 +// -- http://blogs.sun.com/dave/resource/synchronization-public2.pdf 1.63 +// -- synchronizer.cpp 1.64 +// 1.65 +// * Overall goals - desiderata 1.66 +// 1. Minimize context switching 1.67 +// 2. Minimize lock migration 1.68 +// 3. Minimize CPI -- affinity and locality 1.69 +// 4. Minimize the execution of high-latency instructions such as CAS or MEMBAR 1.70 +// 5. Minimize outer lock hold times 1.71 +// 6. Behave gracefully on a loaded system 1.72 +// 1.73 +// * Thread flow and list residency: 1.74 +// 1.75 +// Contention queue --> EntryList --> OnDeck --> Owner --> !Owner 1.76 +// [..resident on monitor list..] 1.77 +// [...........contending..................] 1.78 +// 1.79 +// -- The contention queue (cxq) contains recently-arrived threads (RATs). 1.80 +// Threads on the cxq eventually drain into the EntryList. 1.81 +// -- Invariant: a thread appears on at most one list -- cxq, EntryList 1.82 +// or WaitSet -- at any one time. 1.83 +// -- For a given monitor there can be at most one "OnDeck" thread at any 1.84 +// given time but if needbe this particular invariant could be relaxed. 1.85 +// 1.86 +// * The WaitSet and EntryList linked lists are composed of ParkEvents. 1.87 +// I use ParkEvent instead of threads as ParkEvents are immortal and 1.88 +// type-stable, meaning we can safely unpark() a possibly stale 1.89 +// list element in the unlock()-path. (That's benign). 1.90 +// 1.91 +// * Succession policy - providing for progress: 1.92 +// 1.93 +// As necessary, the unlock()ing thread identifies, unlinks, and unparks 1.94 +// an "heir presumptive" tentative successor thread from the EntryList. 1.95 +// This becomes the so-called "OnDeck" thread, of which there can be only 1.96 +// one at any given time for a given monitor. The wakee will recontend 1.97 +// for ownership of monitor. 1.98 +// 1.99 +// Succession is provided for by a policy of competitive handoff. 1.100 +// The exiting thread does _not_ grant or pass ownership to the 1.101 +// successor thread. (This is also referred to as "handoff" succession"). 1.102 +// Instead the exiting thread releases ownership and possibly wakes 1.103 +// a successor, so the successor can (re)compete for ownership of the lock. 1.104 +// 1.105 +// Competitive handoff provides excellent overall throughput at the expense 1.106 +// of short-term fairness. If fairness is a concern then one remedy might 1.107 +// be to add an AcquireCounter field to the monitor. After a thread acquires 1.108 +// the lock it will decrement the AcquireCounter field. When the count 1.109 +// reaches 0 the thread would reset the AcquireCounter variable, abdicate 1.110 +// the lock directly to some thread on the EntryList, and then move itself to the 1.111 +// tail of the EntryList. 1.112 +// 1.113 +// But in practice most threads engage or otherwise participate in resource 1.114 +// bounded producer-consumer relationships, so lock domination is not usually 1.115 +// a practical concern. Recall too, that in general it's easier to construct 1.116 +// a fair lock from a fast lock, but not vice-versa. 1.117 +// 1.118 +// * The cxq can have multiple concurrent "pushers" but only one concurrent 1.119 +// detaching thread. This mechanism is immune from the ABA corruption. 1.120 +// More precisely, the CAS-based "push" onto cxq is ABA-oblivious. 1.121 +// We use OnDeck as a pseudo-lock to enforce the at-most-one detaching 1.122 +// thread constraint. 1.123 +// 1.124 +// * Taken together, the cxq and the EntryList constitute or form a 1.125 +// single logical queue of threads stalled trying to acquire the lock. 1.126 +// We use two distinct lists to reduce heat on the list ends. 1.127 +// Threads in lock() enqueue onto cxq while threads in unlock() will 1.128 +// dequeue from the EntryList. (c.f. Michael Scott's "2Q" algorithm). 1.129 +// A key desideratum is to minimize queue & monitor metadata manipulation 1.130 +// that occurs while holding the "outer" monitor lock -- that is, we want to 1.131 +// minimize monitor lock holds times. 1.132 +// 1.133 +// The EntryList is ordered by the prevailing queue discipline and 1.134 +// can be organized in any convenient fashion, such as a doubly-linked list or 1.135 +// a circular doubly-linked list. If we need a priority queue then something akin 1.136 +// to Solaris' sleepq would work nicely. Viz., 1.137 +// -- http://agg.eng/ws/on10_nightly/source/usr/src/uts/common/os/sleepq.c. 1.138 +// -- http://cvs.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/os/sleepq.c 1.139 +// Queue discipline is enforced at ::unlock() time, when the unlocking thread 1.140 +// drains the cxq into the EntryList, and orders or reorders the threads on the 1.141 +// EntryList accordingly. 1.142 +// 1.143 +// Barring "lock barging", this mechanism provides fair cyclic ordering, 1.144 +// somewhat similar to an elevator-scan. 1.145 +// 1.146 +// * OnDeck 1.147 +// -- For a given monitor there can be at most one OnDeck thread at any given 1.148 +// instant. The OnDeck thread is contending for the lock, but has been 1.149 +// unlinked from the EntryList and cxq by some previous unlock() operations. 1.150 +// Once a thread has been designated the OnDeck thread it will remain so 1.151 +// until it manages to acquire the lock -- being OnDeck is a stable property. 1.152 +// -- Threads on the EntryList or cxq are _not allowed to attempt lock acquisition. 1.153 +// -- OnDeck also serves as an "inner lock" as follows. Threads in unlock() will, after 1.154 +// having cleared the LockByte and dropped the outer lock, attempt to "trylock" 1.155 +// OnDeck by CASing the field from null to non-null. If successful, that thread 1.156 +// is then responsible for progress and succession and can use CAS to detach and 1.157 +// drain the cxq into the EntryList. By convention, only this thread, the holder of 1.158 +// the OnDeck inner lock, can manipulate the EntryList or detach and drain the 1.159 +// RATs on the cxq into the EntryList. This avoids ABA corruption on the cxq as 1.160 +// we allow multiple concurrent "push" operations but restrict detach concurrency 1.161 +// to at most one thread. Having selected and detached a successor, the thread then 1.162 +// changes the OnDeck to refer to that successor, and then unparks the successor. 1.163 +// That successor will eventually acquire the lock and clear OnDeck. Beware 1.164 +// that the OnDeck usage as a lock is asymmetric. A thread in unlock() transiently 1.165 +// "acquires" OnDeck, performs queue manipulations, passes OnDeck to some successor, 1.166 +// and then the successor eventually "drops" OnDeck. Note that there's never 1.167 +// any sense of contention on the inner lock, however. Threads never contend 1.168 +// or wait for the inner lock. 1.169 +// -- OnDeck provides for futile wakeup throttling a described in section 3.3 of 1.170 +// See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf 1.171 +// In a sense, OnDeck subsumes the ObjectMonitor _Succ and ObjectWaiter 1.172 +// TState fields found in Java-level objectMonitors. (See synchronizer.cpp). 1.173 +// 1.174 +// * Waiting threads reside on the WaitSet list -- wait() puts 1.175 +// the caller onto the WaitSet. Notify() or notifyAll() simply 1.176 +// transfers threads from the WaitSet to either the EntryList or cxq. 1.177 +// Subsequent unlock() operations will eventually unpark the notifyee. 1.178 +// Unparking a notifee in notify() proper is inefficient - if we were to do so 1.179 +// it's likely the notifyee would simply impale itself on the lock held 1.180 +// by the notifier. 1.181 +// 1.182 +// * The mechanism is obstruction-free in that if the holder of the transient 1.183 +// OnDeck lock in unlock() is preempted or otherwise stalls, other threads 1.184 +// can still acquire and release the outer lock and continue to make progress. 1.185 +// At worst, waking of already blocked contending threads may be delayed, 1.186 +// but nothing worse. (We only use "trylock" operations on the inner OnDeck 1.187 +// lock). 1.188 +// 1.189 +// * Note that thread-local storage must be initialized before a thread 1.190 +// uses Native monitors or mutexes. The native monitor-mutex subsystem 1.191 +// depends on Thread::current(). 1.192 +// 1.193 +// * The monitor synchronization subsystem avoids the use of native 1.194 +// synchronization primitives except for the narrow platform-specific 1.195 +// park-unpark abstraction. See the comments in os_solaris.cpp regarding 1.196 +// the semantics of park-unpark. Put another way, this monitor implementation 1.197 +// depends only on atomic operations and park-unpark. The monitor subsystem 1.198 +// manages all RUNNING->BLOCKED and BLOCKED->READY transitions while the 1.199 +// underlying OS manages the READY<->RUN transitions. 1.200 +// 1.201 +// * The memory consistency model provide by lock()-unlock() is at least as 1.202 +// strong or stronger than the Java Memory model defined by JSR-133. 1.203 +// That is, we guarantee at least entry consistency, if not stronger. 1.204 +// See http://g.oswego.edu/dl/jmm/cookbook.html. 1.205 +// 1.206 +// * Thread:: currently contains a set of purpose-specific ParkEvents: 1.207 +// _MutexEvent, _ParkEvent, etc. A better approach might be to do away with 1.208 +// the purpose-specific ParkEvents and instead implement a general per-thread 1.209 +// stack of available ParkEvents which we could provision on-demand. The 1.210 +// stack acts as a local cache to avoid excessive calls to ParkEvent::Allocate() 1.211 +// and ::Release(). A thread would simply pop an element from the local stack before it 1.212 +// enqueued or park()ed. When the contention was over the thread would 1.213 +// push the no-longer-needed ParkEvent back onto its stack. 1.214 +// 1.215 +// * A slightly reduced form of ILock() and IUnlock() have been partially 1.216 +// model-checked (Murphi) for safety and progress at T=1,2,3 and 4. 1.217 +// It'd be interesting to see if TLA/TLC could be useful as well. 1.218 +// 1.219 +// * Mutex-Monitor is a low-level "leaf" subsystem. That is, the monitor 1.220 +// code should never call other code in the JVM that might itself need to 1.221 +// acquire monitors or mutexes. That's true *except* in the case of the 1.222 +// ThreadBlockInVM state transition wrappers. The ThreadBlockInVM DTOR handles 1.223 +// mutator reentry (ingress) by checking for a pending safepoint in which case it will 1.224 +// call SafepointSynchronize::block(), which in turn may call Safepoint_lock->lock(), etc. 1.225 +// In that particular case a call to lock() for a given Monitor can end up recursively 1.226 +// calling lock() on another monitor. While distasteful, this is largely benign 1.227 +// as the calls come from jacket that wraps lock(), and not from deep within lock() itself. 1.228 +// 1.229 +// It's unfortunate that native mutexes and thread state transitions were convolved. 1.230 +// They're really separate concerns and should have remained that way. Melding 1.231 +// them together was facile -- a bit too facile. The current implementation badly 1.232 +// conflates the two concerns. 1.233 +// 1.234 +// * TODO-FIXME: 1.235 +// 1.236 +// -- Add DTRACE probes for contended acquire, contended acquired, contended unlock 1.237 +// We should also add DTRACE probes in the ParkEvent subsystem for 1.238 +// Park-entry, Park-exit, and Unpark. 1.239 +// 1.240 +// -- We have an excess of mutex-like constructs in the JVM, namely: 1.241 +// 1. objectMonitors for Java-level synchronization (synchronizer.cpp) 1.242 +// 2. low-level muxAcquire and muxRelease 1.243 +// 3. low-level spinAcquire and spinRelease 1.244 +// 4. native Mutex:: and Monitor:: 1.245 +// 5. jvm_raw_lock() and _unlock() 1.246 +// 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly 1.247 +// similar name. 1.248 +// 1.249 +// o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o 1.250 + 1.251 + 1.252 +// CASPTR() uses the canonical argument order that dominates in the literature. 1.253 +// Our internal cmpxchg_ptr() uses a bastardized ordering to accommodate Sun .il templates. 1.254 + 1.255 +#define CASPTR(a,c,s) intptr_t(Atomic::cmpxchg_ptr ((void *)(s),(void *)(a),(void *)(c))) 1.256 +#define UNS(x) (uintptr_t(x)) 1.257 +#define TRACE(m) { static volatile int ctr = 0 ; int x = ++ctr ; if ((x & (x-1))==0) { ::printf ("%d:%s\n", x, #m); ::fflush(stdout); }} 1.258 + 1.259 +// Simplistic low-quality Marsaglia SHIFT-XOR RNG. 1.260 +// Bijective except for the trailing mask operation. 1.261 +// Useful for spin loops as the compiler can't optimize it away. 1.262 + 1.263 +static inline jint MarsagliaXORV (jint x) { 1.264 + if (x == 0) x = 1|os::random() ; 1.265 + x ^= x << 6; 1.266 + x ^= ((unsigned)x) >> 21; 1.267 + x ^= x << 7 ; 1.268 + return x & 0x7FFFFFFF ; 1.269 +} 1.270 + 1.271 +static inline jint MarsagliaXOR (jint * const a) { 1.272 + jint x = *a ; 1.273 + if (x == 0) x = UNS(a)|1 ; 1.274 + x ^= x << 6; 1.275 + x ^= ((unsigned)x) >> 21; 1.276 + x ^= x << 7 ; 1.277 + *a = x ; 1.278 + return x & 0x7FFFFFFF ; 1.279 +} 1.280 + 1.281 +static int Stall (int its) { 1.282 + static volatile jint rv = 1 ; 1.283 + volatile int OnFrame = 0 ; 1.284 + jint v = rv ^ UNS(OnFrame) ; 1.285 + while (--its >= 0) { 1.286 + v = MarsagliaXORV (v) ; 1.287 + } 1.288 + // Make this impossible for the compiler to optimize away, 1.289 + // but (mostly) avoid W coherency sharing on MP systems. 1.290 + if (v == 0x12345) rv = v ; 1.291 + return v ; 1.292 +} 1.293 + 1.294 +int Monitor::TryLock () { 1.295 + intptr_t v = _LockWord.FullWord ; 1.296 + for (;;) { 1.297 + if ((v & _LBIT) != 0) return 0 ; 1.298 + const intptr_t u = CASPTR (&_LockWord, v, v|_LBIT) ; 1.299 + if (v == u) return 1 ; 1.300 + v = u ; 1.301 + } 1.302 +} 1.303 + 1.304 +int Monitor::TryFast () { 1.305 + // Optimistic fast-path form ... 1.306 + // Fast-path attempt for the common uncontended case. 1.307 + // Avoid RTS->RTO $ coherence upgrade on typical SMP systems. 1.308 + intptr_t v = CASPTR (&_LockWord, 0, _LBIT) ; // agro ... 1.309 + if (v == 0) return 1 ; 1.310 + 1.311 + for (;;) { 1.312 + if ((v & _LBIT) != 0) return 0 ; 1.313 + const intptr_t u = CASPTR (&_LockWord, v, v|_LBIT) ; 1.314 + if (v == u) return 1 ; 1.315 + v = u ; 1.316 + } 1.317 +} 1.318 + 1.319 +int Monitor::ILocked () { 1.320 + const intptr_t w = _LockWord.FullWord & 0xFF ; 1.321 + assert (w == 0 || w == _LBIT, "invariant") ; 1.322 + return w == _LBIT ; 1.323 +} 1.324 + 1.325 +// Polite TATAS spinlock with exponential backoff - bounded spin. 1.326 +// Ideally we'd use processor cycles, time or vtime to control 1.327 +// the loop, but we currently use iterations. 1.328 +// All the constants within were derived empirically but work over 1.329 +// over the spectrum of J2SE reference platforms. 1.330 +// On Niagara-class systems the back-off is unnecessary but 1.331 +// is relatively harmless. (At worst it'll slightly retard 1.332 +// acquisition times). The back-off is critical for older SMP systems 1.333 +// where constant fetching of the LockWord would otherwise impair 1.334 +// scalability. 1.335 +// 1.336 +// Clamp spinning at approximately 1/2 of a context-switch round-trip. 1.337 +// See synchronizer.cpp for details and rationale. 1.338 + 1.339 +int Monitor::TrySpin (Thread * const Self) { 1.340 + if (TryLock()) return 1 ; 1.341 + if (!os::is_MP()) return 0 ; 1.342 + 1.343 + int Probes = 0 ; 1.344 + int Delay = 0 ; 1.345 + int Steps = 0 ; 1.346 + int SpinMax = NativeMonitorSpinLimit ; 1.347 + int flgs = NativeMonitorFlags ; 1.348 + for (;;) { 1.349 + intptr_t v = _LockWord.FullWord; 1.350 + if ((v & _LBIT) == 0) { 1.351 + if (CASPTR (&_LockWord, v, v|_LBIT) == v) { 1.352 + return 1 ; 1.353 + } 1.354 + continue ; 1.355 + } 1.356 + 1.357 + if ((flgs & 8) == 0) { 1.358 + SpinPause () ; 1.359 + } 1.360 + 1.361 + // Periodically increase Delay -- variable Delay form 1.362 + // conceptually: delay *= 1 + 1/Exponent 1.363 + ++ Probes; 1.364 + if (Probes > SpinMax) return 0 ; 1.365 + 1.366 + if ((Probes & 0x7) == 0) { 1.367 + Delay = ((Delay << 1)|1) & 0x7FF ; 1.368 + // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ; 1.369 + } 1.370 + 1.371 + if (flgs & 2) continue ; 1.372 + 1.373 + // Consider checking _owner's schedctl state, if OFFPROC abort spin. 1.374 + // If the owner is OFFPROC then it's unlike that the lock will be dropped 1.375 + // in a timely fashion, which suggests that spinning would not be fruitful 1.376 + // or profitable. 1.377 + 1.378 + // Stall for "Delay" time units - iterations in the current implementation. 1.379 + // Avoid generating coherency traffic while stalled. 1.380 + // Possible ways to delay: 1.381 + // PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt, 1.382 + // wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ... 1.383 + // Note that on Niagara-class systems we want to minimize STs in the 1.384 + // spin loop. N1 and brethren write-around the L1$ over the xbar into the L2$. 1.385 + // Furthermore, they don't have a W$ like traditional SPARC processors. 1.386 + // We currently use a Marsaglia Shift-Xor RNG loop. 1.387 + Steps += Delay ; 1.388 + if (Self != NULL) { 1.389 + jint rv = Self->rng[0] ; 1.390 + for (int k = Delay ; --k >= 0; ) { 1.391 + rv = MarsagliaXORV (rv) ; 1.392 + if ((flgs & 4) == 0 && SafepointSynchronize::do_call_back()) return 0 ; 1.393 + } 1.394 + Self->rng[0] = rv ; 1.395 + } else { 1.396 + Stall (Delay) ; 1.397 + } 1.398 + } 1.399 +} 1.400 + 1.401 +static int ParkCommon (ParkEvent * ev, jlong timo) { 1.402 + // Diagnostic support - periodically unwedge blocked threads 1.403 + intx nmt = NativeMonitorTimeout ; 1.404 + if (nmt > 0 && (nmt < timo || timo <= 0)) { 1.405 + timo = nmt ; 1.406 + } 1.407 + int err = OS_OK ; 1.408 + if (0 == timo) { 1.409 + ev->park() ; 1.410 + } else { 1.411 + err = ev->park(timo) ; 1.412 + } 1.413 + return err ; 1.414 +} 1.415 + 1.416 +inline int Monitor::AcquireOrPush (ParkEvent * ESelf) { 1.417 + intptr_t v = _LockWord.FullWord ; 1.418 + for (;;) { 1.419 + if ((v & _LBIT) == 0) { 1.420 + const intptr_t u = CASPTR (&_LockWord, v, v|_LBIT) ; 1.421 + if (u == v) return 1 ; // indicate acquired 1.422 + v = u ; 1.423 + } else { 1.424 + // Anticipate success ... 1.425 + ESelf->ListNext = (ParkEvent *) (v & ~_LBIT) ; 1.426 + const intptr_t u = CASPTR (&_LockWord, v, intptr_t(ESelf)|_LBIT) ; 1.427 + if (u == v) return 0 ; // indicate pushed onto cxq 1.428 + v = u ; 1.429 + } 1.430 + // Interference - LockWord change - just retry 1.431 + } 1.432 +} 1.433 + 1.434 +// ILock and IWait are the lowest level primitive internal blocking 1.435 +// synchronization functions. The callers of IWait and ILock must have 1.436 +// performed any needed state transitions beforehand. 1.437 +// IWait and ILock may directly call park() without any concern for thread state. 1.438 +// Note that ILock and IWait do *not* access _owner. 1.439 +// _owner is a higher-level logical concept. 1.440 + 1.441 +void Monitor::ILock (Thread * Self) { 1.442 + assert (_OnDeck != Self->_MutexEvent, "invariant") ; 1.443 + 1.444 + if (TryFast()) { 1.445 + Exeunt: 1.446 + assert (ILocked(), "invariant") ; 1.447 + return ; 1.448 + } 1.449 + 1.450 + ParkEvent * const ESelf = Self->_MutexEvent ; 1.451 + assert (_OnDeck != ESelf, "invariant") ; 1.452 + 1.453 + // As an optimization, spinners could conditionally try to set ONDECK to _LBIT 1.454 + // Synchronizer.cpp uses a similar optimization. 1.455 + if (TrySpin (Self)) goto Exeunt ; 1.456 + 1.457 + // Slow-path - the lock is contended. 1.458 + // Either Enqueue Self on cxq or acquire the outer lock. 1.459 + // LockWord encoding = (cxq,LOCKBYTE) 1.460 + ESelf->reset() ; 1.461 + OrderAccess::fence() ; 1.462 + 1.463 + // Optional optimization ... try barging on the inner lock 1.464 + if ((NativeMonitorFlags & 32) && CASPTR (&_OnDeck, NULL, UNS(Self)) == 0) { 1.465 + goto OnDeck_LOOP ; 1.466 + } 1.467 + 1.468 + if (AcquireOrPush (ESelf)) goto Exeunt ; 1.469 + 1.470 + // At any given time there is at most one ondeck thread. 1.471 + // ondeck implies not resident on cxq and not resident on EntryList 1.472 + // Only the OnDeck thread can try to acquire -- contended for -- the lock. 1.473 + // CONSIDER: use Self->OnDeck instead of m->OnDeck. 1.474 + // Deschedule Self so that others may run. 1.475 + while (_OnDeck != ESelf) { 1.476 + ParkCommon (ESelf, 0) ; 1.477 + } 1.478 + 1.479 + // Self is now in the ONDECK position and will remain so until it 1.480 + // manages to acquire the lock. 1.481 + OnDeck_LOOP: 1.482 + for (;;) { 1.483 + assert (_OnDeck == ESelf, "invariant") ; 1.484 + if (TrySpin (Self)) break ; 1.485 + // CONSIDER: if ESelf->TryPark() && TryLock() break ... 1.486 + // It's probably wise to spin only if we *actually* blocked 1.487 + // CONSIDER: check the lockbyte, if it remains set then 1.488 + // preemptively drain the cxq into the EntryList. 1.489 + // The best place and time to perform queue operations -- lock metadata -- 1.490 + // is _before having acquired the outer lock, while waiting for the lock to drop. 1.491 + ParkCommon (ESelf, 0) ; 1.492 + } 1.493 + 1.494 + assert (_OnDeck == ESelf, "invariant") ; 1.495 + _OnDeck = NULL ; 1.496 + 1.497 + // Note that we current drop the inner lock (clear OnDeck) in the slow-path 1.498 + // epilog immediately after having acquired the outer lock. 1.499 + // But instead we could consider the following optimizations: 1.500 + // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation. 1.501 + // This might avoid potential reacquisition of the inner lock in IUlock(). 1.502 + // B. While still holding the inner lock, attempt to opportunistically select 1.503 + // and unlink the next ONDECK thread from the EntryList. 1.504 + // If successful, set ONDECK to refer to that thread, otherwise clear ONDECK. 1.505 + // It's critical that the select-and-unlink operation run in constant-time as 1.506 + // it executes when holding the outer lock and may artificially increase the 1.507 + // effective length of the critical section. 1.508 + // Note that (A) and (B) are tantamount to succession by direct handoff for 1.509 + // the inner lock. 1.510 + goto Exeunt ; 1.511 +} 1.512 + 1.513 +void Monitor::IUnlock (bool RelaxAssert) { 1.514 + assert (ILocked(), "invariant") ; 1.515 + _LockWord.Bytes[_LSBINDEX] = 0 ; // drop outer lock 1.516 + OrderAccess::storeload (); 1.517 + ParkEvent * const w = _OnDeck ; 1.518 + assert (RelaxAssert || w != Thread::current()->_MutexEvent, "invariant") ; 1.519 + if (w != NULL) { 1.520 + // Either we have a valid ondeck thread or ondeck is transiently "locked" 1.521 + // by some exiting thread as it arranges for succession. The LSBit of 1.522 + // OnDeck allows us to discriminate two cases. If the latter, the 1.523 + // responsibility for progress and succession lies with that other thread. 1.524 + // For good performance, we also depend on the fact that redundant unpark() 1.525 + // operations are cheap. That is, repeated Unpark()ing of the ONDECK thread 1.526 + // is inexpensive. This approach provides implicit futile wakeup throttling. 1.527 + // Note that the referent "w" might be stale with respect to the lock. 1.528 + // In that case the following unpark() is harmless and the worst that'll happen 1.529 + // is a spurious return from a park() operation. Critically, if "w" _is stale, 1.530 + // then progress is known to have occurred as that means the thread associated 1.531 + // with "w" acquired the lock. In that case this thread need take no further 1.532 + // action to guarantee progress. 1.533 + if ((UNS(w) & _LBIT) == 0) w->unpark() ; 1.534 + return ; 1.535 + } 1.536 + 1.537 + intptr_t cxq = _LockWord.FullWord ; 1.538 + if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) { 1.539 + return ; // normal fast-path exit - cxq and EntryList both empty 1.540 + } 1.541 + if (cxq & _LBIT) { 1.542 + // Optional optimization ... 1.543 + // Some other thread acquired the lock in the window since this 1.544 + // thread released it. Succession is now that thread's responsibility. 1.545 + return ; 1.546 + } 1.547 + 1.548 + Succession: 1.549 + // Slow-path exit - this thread must ensure succession and progress. 1.550 + // OnDeck serves as lock to protect cxq and EntryList. 1.551 + // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq. 1.552 + // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS) 1.553 + // but only one concurrent consumer (detacher of RATs). 1.554 + // Consider protecting this critical section with schedctl on Solaris. 1.555 + // Unlike a normal lock, however, the exiting thread "locks" OnDeck, 1.556 + // picks a successor and marks that thread as OnDeck. That successor 1.557 + // thread will then clear OnDeck once it eventually acquires the outer lock. 1.558 + if (CASPTR (&_OnDeck, NULL, _LBIT) != UNS(NULL)) { 1.559 + return ; 1.560 + } 1.561 + 1.562 + ParkEvent * List = _EntryList ; 1.563 + if (List != NULL) { 1.564 + // Transfer the head of the EntryList to the OnDeck position. 1.565 + // Once OnDeck, a thread stays OnDeck until it acquires the lock. 1.566 + // For a given lock there is at most OnDeck thread at any one instant. 1.567 + WakeOne: 1.568 + assert (List == _EntryList, "invariant") ; 1.569 + ParkEvent * const w = List ; 1.570 + assert (RelaxAssert || w != Thread::current()->_MutexEvent, "invariant") ; 1.571 + _EntryList = w->ListNext ; 1.572 + // as a diagnostic measure consider setting w->_ListNext = BAD 1.573 + assert (UNS(_OnDeck) == _LBIT, "invariant") ; 1.574 + _OnDeck = w ; // pass OnDeck to w. 1.575 + // w will clear OnDeck once it acquires the outer lock 1.576 + 1.577 + // Another optional optimization ... 1.578 + // For heavily contended locks it's not uncommon that some other 1.579 + // thread acquired the lock while this thread was arranging succession. 1.580 + // Try to defer the unpark() operation - Delegate the responsibility 1.581 + // for unpark()ing the OnDeck thread to the current or subsequent owners 1.582 + // That is, the new owner is responsible for unparking the OnDeck thread. 1.583 + OrderAccess::storeload() ; 1.584 + cxq = _LockWord.FullWord ; 1.585 + if (cxq & _LBIT) return ; 1.586 + 1.587 + w->unpark() ; 1.588 + return ; 1.589 + } 1.590 + 1.591 + cxq = _LockWord.FullWord ; 1.592 + if ((cxq & ~_LBIT) != 0) { 1.593 + // The EntryList is empty but the cxq is populated. 1.594 + // drain RATs from cxq into EntryList 1.595 + // Detach RATs segment with CAS and then merge into EntryList 1.596 + for (;;) { 1.597 + // optional optimization - if locked, the owner is responsible for succession 1.598 + if (cxq & _LBIT) goto Punt ; 1.599 + const intptr_t vfy = CASPTR (&_LockWord, cxq, cxq & _LBIT) ; 1.600 + if (vfy == cxq) break ; 1.601 + cxq = vfy ; 1.602 + // Interference - LockWord changed - Just retry 1.603 + // We can see concurrent interference from contending threads 1.604 + // pushing themselves onto the cxq or from lock-unlock operations. 1.605 + // From the perspective of this thread, EntryList is stable and 1.606 + // the cxq is prepend-only -- the head is volatile but the interior 1.607 + // of the cxq is stable. In theory if we encounter interference from threads 1.608 + // pushing onto cxq we could simply break off the original cxq suffix and 1.609 + // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts 1.610 + // on the high-traffic LockWord variable. For instance lets say the cxq is "ABCD" 1.611 + // when we first fetch cxq above. Between the fetch -- where we observed "A" 1.612 + // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive, 1.613 + // yielding cxq = "PQRABCD". In this case we could simply set A.ListNext 1.614 + // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList. 1.615 + // Note too, that it's safe for this thread to traverse the cxq 1.616 + // without taking any special concurrency precautions. 1.617 + } 1.618 + 1.619 + // We don't currently reorder the cxq segment as we move it onto 1.620 + // the EntryList, but it might make sense to reverse the order 1.621 + // or perhaps sort by thread priority. See the comments in 1.622 + // synchronizer.cpp objectMonitor::exit(). 1.623 + assert (_EntryList == NULL, "invariant") ; 1.624 + _EntryList = List = (ParkEvent *)(cxq & ~_LBIT) ; 1.625 + assert (List != NULL, "invariant") ; 1.626 + goto WakeOne ; 1.627 + } 1.628 + 1.629 + // cxq|EntryList is empty. 1.630 + // w == NULL implies that cxq|EntryList == NULL in the past. 1.631 + // Possible race - rare inopportune interleaving. 1.632 + // A thread could have added itself to cxq since this thread previously checked. 1.633 + // Detect and recover by refetching cxq. 1.634 + Punt: 1.635 + assert (UNS(_OnDeck) == _LBIT, "invariant") ; 1.636 + _OnDeck = NULL ; // Release inner lock. 1.637 + OrderAccess::storeload(); // Dekker duality - pivot point 1.638 + 1.639 + // Resample LockWord/cxq to recover from possible race. 1.640 + // For instance, while this thread T1 held OnDeck, some other thread T2 might 1.641 + // acquire the outer lock. Another thread T3 might try to acquire the outer 1.642 + // lock, but encounter contention and enqueue itself on cxq. T2 then drops the 1.643 + // outer lock, but skips succession as this thread T1 still holds OnDeck. 1.644 + // T1 is and remains responsible for ensuring succession of T3. 1.645 + // 1.646 + // Note that we don't need to recheck EntryList, just cxq. 1.647 + // If threads moved onto EntryList since we dropped OnDeck 1.648 + // that implies some other thread forced succession. 1.649 + cxq = _LockWord.FullWord ; 1.650 + if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) { 1.651 + goto Succession ; // potential race -- re-run succession 1.652 + } 1.653 + return ; 1.654 +} 1.655 + 1.656 +bool Monitor::notify() { 1.657 + assert (_owner == Thread::current(), "invariant") ; 1.658 + assert (ILocked(), "invariant") ; 1.659 + if (_WaitSet == NULL) return true ; 1.660 + NotifyCount ++ ; 1.661 + 1.662 + // Transfer one thread from the WaitSet to the EntryList or cxq. 1.663 + // Currently we just unlink the head of the WaitSet and prepend to the cxq. 1.664 + // And of course we could just unlink it and unpark it, too, but 1.665 + // in that case it'd likely impale itself on the reentry. 1.666 + Thread::muxAcquire (_WaitLock, "notify:WaitLock") ; 1.667 + ParkEvent * nfy = _WaitSet ; 1.668 + if (nfy != NULL) { // DCL idiom 1.669 + _WaitSet = nfy->ListNext ; 1.670 + assert (nfy->Notified == 0, "invariant") ; 1.671 + // push nfy onto the cxq 1.672 + for (;;) { 1.673 + const intptr_t v = _LockWord.FullWord ; 1.674 + assert ((v & 0xFF) == _LBIT, "invariant") ; 1.675 + nfy->ListNext = (ParkEvent *)(v & ~_LBIT); 1.676 + if (CASPTR (&_LockWord, v, UNS(nfy)|_LBIT) == v) break; 1.677 + // interference - _LockWord changed -- just retry 1.678 + } 1.679 + // Note that setting Notified before pushing nfy onto the cxq is 1.680 + // also legal and safe, but the safety properties are much more 1.681 + // subtle, so for the sake of code stewardship ... 1.682 + OrderAccess::fence() ; 1.683 + nfy->Notified = 1; 1.684 + } 1.685 + Thread::muxRelease (_WaitLock) ; 1.686 + if (nfy != NULL && (NativeMonitorFlags & 16)) { 1.687 + // Experimental code ... light up the wakee in the hope that this thread (the owner) 1.688 + // will drop the lock just about the time the wakee comes ONPROC. 1.689 + nfy->unpark() ; 1.690 + } 1.691 + assert (ILocked(), "invariant") ; 1.692 + return true ; 1.693 +} 1.694 + 1.695 +// Currently notifyAll() transfers the waiters one-at-a-time from the waitset 1.696 +// to the cxq. This could be done more efficiently with a single bulk en-mass transfer, 1.697 +// but in practice notifyAll() for large #s of threads is rare and not time-critical. 1.698 +// Beware too, that we invert the order of the waiters. Lets say that the 1.699 +// waitset is "ABCD" and the cxq is "XYZ". After a notifyAll() the waitset 1.700 +// will be empty and the cxq will be "DCBAXYZ". This is benign, of course. 1.701 + 1.702 +bool Monitor::notify_all() { 1.703 + assert (_owner == Thread::current(), "invariant") ; 1.704 + assert (ILocked(), "invariant") ; 1.705 + while (_WaitSet != NULL) notify() ; 1.706 + return true ; 1.707 +} 1.708 + 1.709 +int Monitor::IWait (Thread * Self, jlong timo) { 1.710 + assert (ILocked(), "invariant") ; 1.711 + 1.712 + // Phases: 1.713 + // 1. Enqueue Self on WaitSet - currently prepend 1.714 + // 2. unlock - drop the outer lock 1.715 + // 3. wait for either notification or timeout 1.716 + // 4. lock - reentry - reacquire the outer lock 1.717 + 1.718 + ParkEvent * const ESelf = Self->_MutexEvent ; 1.719 + ESelf->Notified = 0 ; 1.720 + ESelf->reset() ; 1.721 + OrderAccess::fence() ; 1.722 + 1.723 + // Add Self to WaitSet 1.724 + // Ideally only the holder of the outer lock would manipulate the WaitSet - 1.725 + // That is, the outer lock would implicitly protect the WaitSet. 1.726 + // But if a thread in wait() encounters a timeout it will need to dequeue itself 1.727 + // from the WaitSet _before it becomes the owner of the lock. We need to dequeue 1.728 + // as the ParkEvent -- which serves as a proxy for the thread -- can't reside 1.729 + // on both the WaitSet and the EntryList|cxq at the same time.. That is, a thread 1.730 + // on the WaitSet can't be allowed to compete for the lock until it has managed to 1.731 + // unlink its ParkEvent from WaitSet. Thus the need for WaitLock. 1.732 + // Contention on the WaitLock is minimal. 1.733 + // 1.734 + // Another viable approach would be add another ParkEvent, "WaitEvent" to the 1.735 + // thread class. The WaitSet would be composed of WaitEvents. Only the 1.736 + // owner of the outer lock would manipulate the WaitSet. A thread in wait() 1.737 + // could then compete for the outer lock, and then, if necessary, unlink itself 1.738 + // from the WaitSet only after having acquired the outer lock. More precisely, 1.739 + // there would be no WaitLock. A thread in in wait() would enqueue its WaitEvent 1.740 + // on the WaitSet; release the outer lock; wait for either notification or timeout; 1.741 + // reacquire the inner lock; and then, if needed, unlink itself from the WaitSet. 1.742 + // 1.743 + // Alternatively, a 2nd set of list link fields in the ParkEvent might suffice. 1.744 + // One set would be for the WaitSet and one for the EntryList. 1.745 + // We could also deconstruct the ParkEvent into a "pure" event and add a 1.746 + // new immortal/TSM "ListElement" class that referred to ParkEvents. 1.747 + // In that case we could have one ListElement on the WaitSet and another 1.748 + // on the EntryList, with both referring to the same pure Event. 1.749 + 1.750 + Thread::muxAcquire (_WaitLock, "wait:WaitLock:Add") ; 1.751 + ESelf->ListNext = _WaitSet ; 1.752 + _WaitSet = ESelf ; 1.753 + Thread::muxRelease (_WaitLock) ; 1.754 + 1.755 + // Release the outer lock 1.756 + // We call IUnlock (RelaxAssert=true) as a thread T1 might 1.757 + // enqueue itself on the WaitSet, call IUnlock(), drop the lock, 1.758 + // and then stall before it can attempt to wake a successor. 1.759 + // Some other thread T2 acquires the lock, and calls notify(), moving 1.760 + // T1 from the WaitSet to the cxq. T2 then drops the lock. T1 resumes, 1.761 + // and then finds *itself* on the cxq. During the course of a normal 1.762 + // IUnlock() call a thread should _never find itself on the EntryList 1.763 + // or cxq, but in the case of wait() it's possible. 1.764 + // See synchronizer.cpp objectMonitor::wait(). 1.765 + IUnlock (true) ; 1.766 + 1.767 + // Wait for either notification or timeout 1.768 + // Beware that in some circumstances we might propagate 1.769 + // spurious wakeups back to the caller. 1.770 + 1.771 + for (;;) { 1.772 + if (ESelf->Notified) break ; 1.773 + int err = ParkCommon (ESelf, timo) ; 1.774 + if (err == OS_TIMEOUT || (NativeMonitorFlags & 1)) break ; 1.775 + } 1.776 + 1.777 + // Prepare for reentry - if necessary, remove ESelf from WaitSet 1.778 + // ESelf can be: 1.779 + // 1. Still on the WaitSet. This can happen if we exited the loop by timeout. 1.780 + // 2. On the cxq or EntryList 1.781 + // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position. 1.782 + 1.783 + OrderAccess::fence() ; 1.784 + int WasOnWaitSet = 0 ; 1.785 + if (ESelf->Notified == 0) { 1.786 + Thread::muxAcquire (_WaitLock, "wait:WaitLock:remove") ; 1.787 + if (ESelf->Notified == 0) { // DCL idiom 1.788 + assert (_OnDeck != ESelf, "invariant") ; // can't be both OnDeck and on WaitSet 1.789 + // ESelf is resident on the WaitSet -- unlink it. 1.790 + // A doubly-linked list would be better here so we can unlink in constant-time. 1.791 + // We have to unlink before we potentially recontend as ESelf might otherwise 1.792 + // end up on the cxq|EntryList -- it can't be on two lists at once. 1.793 + ParkEvent * p = _WaitSet ; 1.794 + ParkEvent * q = NULL ; // classic q chases p 1.795 + while (p != NULL && p != ESelf) { 1.796 + q = p ; 1.797 + p = p->ListNext ; 1.798 + } 1.799 + assert (p == ESelf, "invariant") ; 1.800 + if (p == _WaitSet) { // found at head 1.801 + assert (q == NULL, "invariant") ; 1.802 + _WaitSet = p->ListNext ; 1.803 + } else { // found in interior 1.804 + assert (q->ListNext == p, "invariant") ; 1.805 + q->ListNext = p->ListNext ; 1.806 + } 1.807 + WasOnWaitSet = 1 ; // We were *not* notified but instead encountered timeout 1.808 + } 1.809 + Thread::muxRelease (_WaitLock) ; 1.810 + } 1.811 + 1.812 + // Reentry phase - reacquire the lock 1.813 + if (WasOnWaitSet) { 1.814 + // ESelf was previously on the WaitSet but we just unlinked it above 1.815 + // because of a timeout. ESelf is not resident on any list and is not OnDeck 1.816 + assert (_OnDeck != ESelf, "invariant") ; 1.817 + ILock (Self) ; 1.818 + } else { 1.819 + // A prior notify() operation moved ESelf from the WaitSet to the cxq. 1.820 + // ESelf is now on the cxq, EntryList or at the OnDeck position. 1.821 + // The following fragment is extracted from Monitor::ILock() 1.822 + for (;;) { 1.823 + if (_OnDeck == ESelf && TrySpin(Self)) break ; 1.824 + ParkCommon (ESelf, 0) ; 1.825 + } 1.826 + assert (_OnDeck == ESelf, "invariant") ; 1.827 + _OnDeck = NULL ; 1.828 + } 1.829 + 1.830 + assert (ILocked(), "invariant") ; 1.831 + return WasOnWaitSet != 0 ; // return true IFF timeout 1.832 +} 1.833 + 1.834 + 1.835 +// ON THE VMTHREAD SNEAKING PAST HELD LOCKS: 1.836 +// In particular, there are certain types of global lock that may be held 1.837 +// by a Java thread while it is blocked at a safepoint but before it has 1.838 +// written the _owner field. These locks may be sneakily acquired by the 1.839 +// VM thread during a safepoint to avoid deadlocks. Alternatively, one should 1.840 +// identify all such locks, and ensure that Java threads never block at 1.841 +// safepoints while holding them (_no_safepoint_check_flag). While it 1.842 +// seems as though this could increase the time to reach a safepoint 1.843 +// (or at least increase the mean, if not the variance), the latter 1.844 +// approach might make for a cleaner, more maintainable JVM design. 1.845 +// 1.846 +// Sneaking is vile and reprehensible and should be excised at the 1st 1.847 +// opportunity. It's possible that the need for sneaking could be obviated 1.848 +// as follows. Currently, a thread might (a) while TBIVM, call pthread_mutex_lock 1.849 +// or ILock() thus acquiring the "physical" lock underlying Monitor/Mutex. 1.850 +// (b) stall at the TBIVM exit point as a safepoint is in effect. Critically, 1.851 +// it'll stall at the TBIVM reentry state transition after having acquired the 1.852 +// underlying lock, but before having set _owner and having entered the actual 1.853 +// critical section. The lock-sneaking facility leverages that fact and allowed the 1.854 +// VM thread to logically acquire locks that had already be physically locked by mutators 1.855 +// but where mutators were known blocked by the reentry thread state transition. 1.856 +// 1.857 +// If we were to modify the Monitor-Mutex so that TBIVM state transitions tightly 1.858 +// wrapped calls to park(), then we could likely do away with sneaking. We'd 1.859 +// decouple lock acquisition and parking. The critical invariant to eliminating 1.860 +// sneaking is to ensure that we never "physically" acquire the lock while TBIVM. 1.861 +// An easy way to accomplish this is to wrap the park calls in a narrow TBIVM jacket. 1.862 +// One difficulty with this approach is that the TBIVM wrapper could recurse and 1.863 +// call lock() deep from within a lock() call, while the MutexEvent was already enqueued. 1.864 +// Using a stack (N=2 at minimum) of ParkEvents would take care of that problem. 1.865 +// 1.866 +// But of course the proper ultimate approach is to avoid schemes that require explicit 1.867 +// sneaking or dependence on any any clever invariants or subtle implementation properties 1.868 +// of Mutex-Monitor and instead directly address the underlying design flaw. 1.869 + 1.870 +void Monitor::lock (Thread * Self) { 1.871 +#ifdef CHECK_UNHANDLED_OOPS 1.872 + // Clear unhandled oops so we get a crash right away. Only clear for non-vm 1.873 + // or GC threads. 1.874 + if (Self->is_Java_thread()) { 1.875 + Self->clear_unhandled_oops(); 1.876 + } 1.877 +#endif // CHECK_UNHANDLED_OOPS 1.878 + 1.879 + debug_only(check_prelock_state(Self)); 1.880 + assert (_owner != Self , "invariant") ; 1.881 + assert (_OnDeck != Self->_MutexEvent, "invariant") ; 1.882 + 1.883 + if (TryFast()) { 1.884 + Exeunt: 1.885 + assert (ILocked(), "invariant") ; 1.886 + assert (owner() == NULL, "invariant"); 1.887 + set_owner (Self); 1.888 + return ; 1.889 + } 1.890 + 1.891 + // The lock is contended ... 1.892 + 1.893 + bool can_sneak = Self->is_VM_thread() && SafepointSynchronize::is_at_safepoint(); 1.894 + if (can_sneak && _owner == NULL) { 1.895 + // a java thread has locked the lock but has not entered the 1.896 + // critical region -- let's just pretend we've locked the lock 1.897 + // and go on. we note this with _snuck so we can also 1.898 + // pretend to unlock when the time comes. 1.899 + _snuck = true; 1.900 + goto Exeunt ; 1.901 + } 1.902 + 1.903 + // Try a brief spin to avoid passing thru thread state transition ... 1.904 + if (TrySpin (Self)) goto Exeunt ; 1.905 + 1.906 + check_block_state(Self); 1.907 + if (Self->is_Java_thread()) { 1.908 + // Horribile dictu - we suffer through a state transition 1.909 + assert(rank() > Mutex::special, "Potential deadlock with special or lesser rank mutex"); 1.910 + ThreadBlockInVM tbivm ((JavaThread *) Self) ; 1.911 + ILock (Self) ; 1.912 + } else { 1.913 + // Mirabile dictu 1.914 + ILock (Self) ; 1.915 + } 1.916 + goto Exeunt ; 1.917 +} 1.918 + 1.919 +void Monitor::lock() { 1.920 + this->lock(Thread::current()); 1.921 +} 1.922 + 1.923 +// Lock without safepoint check - a degenerate variant of lock(). 1.924 +// Should ONLY be used by safepoint code and other code 1.925 +// that is guaranteed not to block while running inside the VM. If this is called with 1.926 +// thread state set to be in VM, the safepoint synchronization code will deadlock! 1.927 + 1.928 +void Monitor::lock_without_safepoint_check (Thread * Self) { 1.929 + assert (_owner != Self, "invariant") ; 1.930 + ILock (Self) ; 1.931 + assert (_owner == NULL, "invariant"); 1.932 + set_owner (Self); 1.933 +} 1.934 + 1.935 +void Monitor::lock_without_safepoint_check () { 1.936 + lock_without_safepoint_check (Thread::current()) ; 1.937 +} 1.938 + 1.939 + 1.940 +// Returns true if thread succeceed [sic] in grabbing the lock, otherwise false. 1.941 + 1.942 +bool Monitor::try_lock() { 1.943 + Thread * const Self = Thread::current(); 1.944 + debug_only(check_prelock_state(Self)); 1.945 + // assert(!thread->is_inside_signal_handler(), "don't lock inside signal handler"); 1.946 + 1.947 + // Special case, where all Java threads are stopped. 1.948 + // The lock may have been acquired but _owner is not yet set. 1.949 + // In that case the VM thread can safely grab the lock. 1.950 + // It strikes me this should appear _after the TryLock() fails, below. 1.951 + bool can_sneak = Self->is_VM_thread() && SafepointSynchronize::is_at_safepoint(); 1.952 + if (can_sneak && _owner == NULL) { 1.953 + set_owner(Self); // Do not need to be atomic, since we are at a safepoint 1.954 + _snuck = true; 1.955 + return true; 1.956 + } 1.957 + 1.958 + if (TryLock()) { 1.959 + // We got the lock 1.960 + assert (_owner == NULL, "invariant"); 1.961 + set_owner (Self); 1.962 + return true; 1.963 + } 1.964 + return false; 1.965 +} 1.966 + 1.967 +void Monitor::unlock() { 1.968 + assert (_owner == Thread::current(), "invariant") ; 1.969 + assert (_OnDeck != Thread::current()->_MutexEvent , "invariant") ; 1.970 + set_owner (NULL) ; 1.971 + if (_snuck) { 1.972 + assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak"); 1.973 + _snuck = false; 1.974 + return ; 1.975 + } 1.976 + IUnlock (false) ; 1.977 +} 1.978 + 1.979 +// Yet another degenerate version of Monitor::lock() or lock_without_safepoint_check() 1.980 +// jvm_raw_lock() and _unlock() can be called by non-Java threads via JVM_RawMonitorEnter. 1.981 +// 1.982 +// There's no expectation that JVM_RawMonitors will interoperate properly with the native 1.983 +// Mutex-Monitor constructs. We happen to implement JVM_RawMonitors in terms of 1.984 +// native Mutex-Monitors simply as a matter of convenience. A simple abstraction layer 1.985 +// over a pthread_mutex_t would work equally as well, but require more platform-specific 1.986 +// code -- a "PlatformMutex". Alternatively, a simply layer over muxAcquire-muxRelease 1.987 +// would work too. 1.988 +// 1.989 +// Since the caller might be a foreign thread, we don't necessarily have a Thread.MutexEvent 1.990 +// instance available. Instead, we transiently allocate a ParkEvent on-demand if 1.991 +// we encounter contention. That ParkEvent remains associated with the thread 1.992 +// until it manages to acquire the lock, at which time we return the ParkEvent 1.993 +// to the global ParkEvent free list. This is correct and suffices for our purposes. 1.994 +// 1.995 +// Beware that the original jvm_raw_unlock() had a "_snuck" test but that 1.996 +// jvm_raw_lock() didn't have the corresponding test. I suspect that's an 1.997 +// oversight, but I've replicated the original suspect logic in the new code ... 1.998 + 1.999 +void Monitor::jvm_raw_lock() { 1.1000 + assert(rank() == native, "invariant"); 1.1001 + 1.1002 + if (TryLock()) { 1.1003 + Exeunt: 1.1004 + assert (ILocked(), "invariant") ; 1.1005 + assert (_owner == NULL, "invariant"); 1.1006 + // This can potentially be called by non-java Threads. Thus, the ThreadLocalStorage 1.1007 + // might return NULL. Don't call set_owner since it will break on an NULL owner 1.1008 + // Consider installing a non-null "ANON" distinguished value instead of just NULL. 1.1009 + _owner = ThreadLocalStorage::thread(); 1.1010 + return ; 1.1011 + } 1.1012 + 1.1013 + if (TrySpin(NULL)) goto Exeunt ; 1.1014 + 1.1015 + // slow-path - apparent contention 1.1016 + // Allocate a ParkEvent for transient use. 1.1017 + // The ParkEvent remains associated with this thread until 1.1018 + // the time the thread manages to acquire the lock. 1.1019 + ParkEvent * const ESelf = ParkEvent::Allocate(NULL) ; 1.1020 + ESelf->reset() ; 1.1021 + OrderAccess::storeload() ; 1.1022 + 1.1023 + // Either Enqueue Self on cxq or acquire the outer lock. 1.1024 + if (AcquireOrPush (ESelf)) { 1.1025 + ParkEvent::Release (ESelf) ; // surrender the ParkEvent 1.1026 + goto Exeunt ; 1.1027 + } 1.1028 + 1.1029 + // At any given time there is at most one ondeck thread. 1.1030 + // ondeck implies not resident on cxq and not resident on EntryList 1.1031 + // Only the OnDeck thread can try to acquire -- contended for -- the lock. 1.1032 + // CONSIDER: use Self->OnDeck instead of m->OnDeck. 1.1033 + for (;;) { 1.1034 + if (_OnDeck == ESelf && TrySpin(NULL)) break ; 1.1035 + ParkCommon (ESelf, 0) ; 1.1036 + } 1.1037 + 1.1038 + assert (_OnDeck == ESelf, "invariant") ; 1.1039 + _OnDeck = NULL ; 1.1040 + ParkEvent::Release (ESelf) ; // surrender the ParkEvent 1.1041 + goto Exeunt ; 1.1042 +} 1.1043 + 1.1044 +void Monitor::jvm_raw_unlock() { 1.1045 + // Nearly the same as Monitor::unlock() ... 1.1046 + // directly set _owner instead of using set_owner(null) 1.1047 + _owner = NULL ; 1.1048 + if (_snuck) { // ??? 1.1049 + assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak"); 1.1050 + _snuck = false; 1.1051 + return ; 1.1052 + } 1.1053 + IUnlock(false) ; 1.1054 +} 1.1055 + 1.1056 +bool Monitor::wait(bool no_safepoint_check, long timeout, bool as_suspend_equivalent) { 1.1057 + Thread * const Self = Thread::current() ; 1.1058 + assert (_owner == Self, "invariant") ; 1.1059 + assert (ILocked(), "invariant") ; 1.1060 + 1.1061 + // as_suspend_equivalent logically implies !no_safepoint_check 1.1062 + guarantee (!as_suspend_equivalent || !no_safepoint_check, "invariant") ; 1.1063 + // !no_safepoint_check logically implies java_thread 1.1064 + guarantee (no_safepoint_check || Self->is_Java_thread(), "invariant") ; 1.1065 + 1.1066 + #ifdef ASSERT 1.1067 + Monitor * least = get_least_ranked_lock_besides_this(Self->owned_locks()); 1.1068 + assert(least != this, "Specification of get_least_... call above"); 1.1069 + if (least != NULL && least->rank() <= special) { 1.1070 + tty->print("Attempting to wait on monitor %s/%d while holding" 1.1071 + " lock %s/%d -- possible deadlock", 1.1072 + name(), rank(), least->name(), least->rank()); 1.1073 + assert(false, "Shouldn't block(wait) while holding a lock of rank special"); 1.1074 + } 1.1075 + #endif // ASSERT 1.1076 + 1.1077 + int wait_status ; 1.1078 + // conceptually set the owner to NULL in anticipation of 1.1079 + // abdicating the lock in wait 1.1080 + set_owner(NULL); 1.1081 + if (no_safepoint_check) { 1.1082 + wait_status = IWait (Self, timeout) ; 1.1083 + } else { 1.1084 + assert (Self->is_Java_thread(), "invariant") ; 1.1085 + JavaThread *jt = (JavaThread *)Self; 1.1086 + 1.1087 + // Enter safepoint region - ornate and Rococo ... 1.1088 + ThreadBlockInVM tbivm(jt); 1.1089 + OSThreadWaitState osts(Self->osthread(), false /* not Object.wait() */); 1.1090 + 1.1091 + if (as_suspend_equivalent) { 1.1092 + jt->set_suspend_equivalent(); 1.1093 + // cleared by handle_special_suspend_equivalent_condition() or 1.1094 + // java_suspend_self() 1.1095 + } 1.1096 + 1.1097 + wait_status = IWait (Self, timeout) ; 1.1098 + 1.1099 + // were we externally suspended while we were waiting? 1.1100 + if (as_suspend_equivalent && jt->handle_special_suspend_equivalent_condition()) { 1.1101 + // Our event wait has finished and we own the lock, but 1.1102 + // while we were waiting another thread suspended us. We don't 1.1103 + // want to hold the lock while suspended because that 1.1104 + // would surprise the thread that suspended us. 1.1105 + assert (ILocked(), "invariant") ; 1.1106 + IUnlock (true) ; 1.1107 + jt->java_suspend_self(); 1.1108 + ILock (Self) ; 1.1109 + assert (ILocked(), "invariant") ; 1.1110 + } 1.1111 + } 1.1112 + 1.1113 + // Conceptually reestablish ownership of the lock. 1.1114 + // The "real" lock -- the LockByte -- was reacquired by IWait(). 1.1115 + assert (ILocked(), "invariant") ; 1.1116 + assert (_owner == NULL, "invariant") ; 1.1117 + set_owner (Self) ; 1.1118 + return wait_status != 0 ; // return true IFF timeout 1.1119 +} 1.1120 + 1.1121 +Monitor::~Monitor() { 1.1122 + assert ((UNS(_owner)|UNS(_LockWord.FullWord)|UNS(_EntryList)|UNS(_WaitSet)|UNS(_OnDeck)) == 0, "") ; 1.1123 +} 1.1124 + 1.1125 +void Monitor::ClearMonitor (Monitor * m) { 1.1126 + m->_owner = NULL ; 1.1127 + m->_snuck = false ; 1.1128 + m->_name = "UNKNOWN" ; 1.1129 + m->_LockWord.FullWord = 0 ; 1.1130 + m->_EntryList = NULL ; 1.1131 + m->_OnDeck = NULL ; 1.1132 + m->_WaitSet = NULL ; 1.1133 + m->_WaitLock[0] = 0 ; 1.1134 +} 1.1135 + 1.1136 +Monitor::Monitor() { ClearMonitor(this); } 1.1137 + 1.1138 +Monitor::Monitor (int Rank, const char * name, bool allow_vm_block) { 1.1139 + ClearMonitor (this) ; 1.1140 +#ifdef ASSERT 1.1141 + _allow_vm_block = allow_vm_block; 1.1142 + _rank = Rank ; 1.1143 +#endif 1.1144 +} 1.1145 + 1.1146 +Mutex::~Mutex() { 1.1147 + assert ((UNS(_owner)|UNS(_LockWord.FullWord)|UNS(_EntryList)|UNS(_WaitSet)|UNS(_OnDeck)) == 0, "") ; 1.1148 +} 1.1149 + 1.1150 +Mutex::Mutex (int Rank, const char * name, bool allow_vm_block) { 1.1151 + ClearMonitor ((Monitor *) this) ; 1.1152 +#ifdef ASSERT 1.1153 + _allow_vm_block = allow_vm_block; 1.1154 + _rank = Rank ; 1.1155 +#endif 1.1156 +} 1.1157 + 1.1158 +bool Monitor::owned_by_self() const { 1.1159 + bool ret = _owner == Thread::current(); 1.1160 + assert (!ret || _LockWord.Bytes[_LSBINDEX] != 0, "invariant") ; 1.1161 + return ret; 1.1162 +} 1.1163 + 1.1164 +void Monitor::print_on_error(outputStream* st) const { 1.1165 + st->print("[" PTR_FORMAT, this); 1.1166 + st->print("] %s", _name); 1.1167 + st->print(" - owner thread: " PTR_FORMAT, _owner); 1.1168 +} 1.1169 + 1.1170 + 1.1171 + 1.1172 + 1.1173 +// ---------------------------------------------------------------------------------- 1.1174 +// Non-product code 1.1175 + 1.1176 +#ifndef PRODUCT 1.1177 +void Monitor::print_on(outputStream* st) const { 1.1178 + st->print_cr("Mutex: [0x%lx/0x%lx] %s - owner: 0x%lx", this, _LockWord.FullWord, _name, _owner); 1.1179 +} 1.1180 +#endif 1.1181 + 1.1182 +#ifndef PRODUCT 1.1183 +#ifdef ASSERT 1.1184 +Monitor * Monitor::get_least_ranked_lock(Monitor * locks) { 1.1185 + Monitor *res, *tmp; 1.1186 + for (res = tmp = locks; tmp != NULL; tmp = tmp->next()) { 1.1187 + if (tmp->rank() < res->rank()) { 1.1188 + res = tmp; 1.1189 + } 1.1190 + } 1.1191 + if (!SafepointSynchronize::is_at_safepoint()) { 1.1192 + // In this case, we expect the held locks to be 1.1193 + // in increasing rank order (modulo any native ranks) 1.1194 + for (tmp = locks; tmp != NULL; tmp = tmp->next()) { 1.1195 + if (tmp->next() != NULL) { 1.1196 + assert(tmp->rank() == Mutex::native || 1.1197 + tmp->rank() <= tmp->next()->rank(), "mutex rank anomaly?"); 1.1198 + } 1.1199 + } 1.1200 + } 1.1201 + return res; 1.1202 +} 1.1203 + 1.1204 +Monitor* Monitor::get_least_ranked_lock_besides_this(Monitor* locks) { 1.1205 + Monitor *res, *tmp; 1.1206 + for (res = NULL, tmp = locks; tmp != NULL; tmp = tmp->next()) { 1.1207 + if (tmp != this && (res == NULL || tmp->rank() < res->rank())) { 1.1208 + res = tmp; 1.1209 + } 1.1210 + } 1.1211 + if (!SafepointSynchronize::is_at_safepoint()) { 1.1212 + // In this case, we expect the held locks to be 1.1213 + // in increasing rank order (modulo any native ranks) 1.1214 + for (tmp = locks; tmp != NULL; tmp = tmp->next()) { 1.1215 + if (tmp->next() != NULL) { 1.1216 + assert(tmp->rank() == Mutex::native || 1.1217 + tmp->rank() <= tmp->next()->rank(), "mutex rank anomaly?"); 1.1218 + } 1.1219 + } 1.1220 + } 1.1221 + return res; 1.1222 +} 1.1223 + 1.1224 + 1.1225 +bool Monitor::contains(Monitor* locks, Monitor * lock) { 1.1226 + for (; locks != NULL; locks = locks->next()) { 1.1227 + if (locks == lock) 1.1228 + return true; 1.1229 + } 1.1230 + return false; 1.1231 +} 1.1232 +#endif 1.1233 + 1.1234 +// Called immediately after lock acquisition or release as a diagnostic 1.1235 +// to track the lock-set of the thread and test for rank violations that 1.1236 +// might indicate exposure to deadlock. 1.1237 +// Rather like an EventListener for _owner (:>). 1.1238 + 1.1239 +void Monitor::set_owner_implementation(Thread *new_owner) { 1.1240 + // This function is solely responsible for maintaining 1.1241 + // and checking the invariant that threads and locks 1.1242 + // are in a 1/N relation, with some some locks unowned. 1.1243 + // It uses the Mutex::_owner, Mutex::_next, and 1.1244 + // Thread::_owned_locks fields, and no other function 1.1245 + // changes those fields. 1.1246 + // It is illegal to set the mutex from one non-NULL 1.1247 + // owner to another--it must be owned by NULL as an 1.1248 + // intermediate state. 1.1249 + 1.1250 + if (new_owner != NULL) { 1.1251 + // the thread is acquiring this lock 1.1252 + 1.1253 + assert(new_owner == Thread::current(), "Should I be doing this?"); 1.1254 + assert(_owner == NULL, "setting the owner thread of an already owned mutex"); 1.1255 + _owner = new_owner; // set the owner 1.1256 + 1.1257 + // link "this" into the owned locks list 1.1258 + 1.1259 + #ifdef ASSERT // Thread::_owned_locks is under the same ifdef 1.1260 + Monitor* locks = get_least_ranked_lock(new_owner->owned_locks()); 1.1261 + // Mutex::set_owner_implementation is a friend of Thread 1.1262 + 1.1263 + assert(this->rank() >= 0, "bad lock rank"); 1.1264 + 1.1265 + if (LogMultipleMutexLocking && locks != NULL) { 1.1266 + Events::log("thread " INTPTR_FORMAT " locks %s, already owns %s", new_owner, name(), locks->name()); 1.1267 + } 1.1268 + 1.1269 + // Deadlock avoidance rules require us to acquire Mutexes only in 1.1270 + // a global total order. For example m1 is the lowest ranked mutex 1.1271 + // that the thread holds and m2 is the mutex the thread is trying 1.1272 + // to acquire, then deadlock avoidance rules require that the rank 1.1273 + // of m2 be less than the rank of m1. 1.1274 + // The rank Mutex::native is an exception in that it is not subject 1.1275 + // to the verification rules. 1.1276 + // Here are some further notes relating to mutex acquisition anomalies: 1.1277 + // . under Solaris, the interrupt lock gets acquired when doing 1.1278 + // profiling, so any lock could be held. 1.1279 + // . it is also ok to acquire Safepoint_lock at the very end while we 1.1280 + // already hold Terminator_lock - may happen because of periodic safepoints 1.1281 + if (this->rank() != Mutex::native && 1.1282 + this->rank() != Mutex::suspend_resume && 1.1283 + locks != NULL && locks->rank() <= this->rank() && 1.1284 + !SafepointSynchronize::is_at_safepoint() && 1.1285 + this != Interrupt_lock && this != ProfileVM_lock && 1.1286 + !(this == Safepoint_lock && contains(locks, Terminator_lock) && 1.1287 + SafepointSynchronize::is_synchronizing())) { 1.1288 + new_owner->print_owned_locks(); 1.1289 + fatal4("acquiring lock %s/%d out of order with lock %s/%d -- possible deadlock", 1.1290 + this->name(), this->rank(), locks->name(), locks->rank()); 1.1291 + } 1.1292 + 1.1293 + this->_next = new_owner->_owned_locks; 1.1294 + new_owner->_owned_locks = this; 1.1295 + #endif 1.1296 + 1.1297 + } else { 1.1298 + // the thread is releasing this lock 1.1299 + 1.1300 + Thread* old_owner = _owner; 1.1301 + debug_only(_last_owner = old_owner); 1.1302 + 1.1303 + assert(old_owner != NULL, "removing the owner thread of an unowned mutex"); 1.1304 + assert(old_owner == Thread::current(), "removing the owner thread of an unowned mutex"); 1.1305 + 1.1306 + _owner = NULL; // set the owner 1.1307 + 1.1308 + #ifdef ASSERT 1.1309 + Monitor *locks = old_owner->owned_locks(); 1.1310 + 1.1311 + if (LogMultipleMutexLocking && locks != this) { 1.1312 + Events::log("thread " INTPTR_FORMAT " unlocks %s, still owns %s", old_owner, this->name(), locks->name()); 1.1313 + } 1.1314 + 1.1315 + // remove "this" from the owned locks list 1.1316 + 1.1317 + Monitor *prev = NULL; 1.1318 + bool found = false; 1.1319 + for (; locks != NULL; prev = locks, locks = locks->next()) { 1.1320 + if (locks == this) { 1.1321 + found = true; 1.1322 + break; 1.1323 + } 1.1324 + } 1.1325 + assert(found, "Removing a lock not owned"); 1.1326 + if (prev == NULL) { 1.1327 + old_owner->_owned_locks = _next; 1.1328 + } else { 1.1329 + prev->_next = _next; 1.1330 + } 1.1331 + _next = NULL; 1.1332 + #endif 1.1333 + } 1.1334 +} 1.1335 + 1.1336 + 1.1337 +// Factored out common sanity checks for locking mutex'es. Used by lock() and try_lock() 1.1338 +void Monitor::check_prelock_state(Thread *thread) { 1.1339 + assert((!thread->is_Java_thread() || ((JavaThread *)thread)->thread_state() == _thread_in_vm) 1.1340 + || rank() == Mutex::special, "wrong thread state for using locks"); 1.1341 + if (StrictSafepointChecks) { 1.1342 + if (thread->is_VM_thread() && !allow_vm_block()) { 1.1343 + fatal1("VM thread using lock %s (not allowed to block on)", name()); 1.1344 + } 1.1345 + debug_only(if (rank() != Mutex::special) \ 1.1346 + thread->check_for_valid_safepoint_state(false);) 1.1347 + } 1.1348 +} 1.1349 + 1.1350 +void Monitor::check_block_state(Thread *thread) { 1.1351 + if (!_allow_vm_block && thread->is_VM_thread()) { 1.1352 + warning("VM thread blocked on lock"); 1.1353 + print(); 1.1354 + BREAKPOINT; 1.1355 + } 1.1356 + assert(_owner != thread, "deadlock: blocking on monitor owned by current thread"); 1.1357 +} 1.1358 + 1.1359 +#endif // PRODUCT