3963 current->print_on_error(st, buf, buflen); |
3963 current->print_on_error(st, buf, buflen); |
3964 st->cr(); |
3964 st->cr(); |
3965 } |
3965 } |
3966 } |
3966 } |
3967 |
3967 |
3968 |
3968 // Internal SpinLock and Mutex |
3969 // Lifecycle management for TSM ParkEvents. |
3969 // Based on ParkEvent |
3970 // ParkEvents are type-stable (TSM). |
3970 |
3971 // In our particular implementation they happen to be immortal. |
3971 // Ad-hoc mutual exclusion primitives: SpinLock and Mux |
3972 // |
3972 // |
3973 // We manage concurrency on the FreeList with a CAS-based |
3973 // We employ SpinLocks _only for low-contention, fixed-length |
3974 // detach-modify-reattach idiom that avoids the ABA problems |
3974 // short-duration critical sections where we're concerned |
3975 // that would otherwise be present in a simple CAS-based |
3975 // about native mutex_t or HotSpot Mutex:: latency. |
3976 // push-pop implementation. (push-one and pop-all) |
3976 // The mux construct provides a spin-then-block mutual exclusion |
|
3977 // mechanism. |
3977 // |
3978 // |
3978 // Caveat: Allocate() and Release() may be called from threads |
3979 // Testing has shown that contention on the ListLock guarding gFreeList |
3979 // other than the thread associated with the Event! |
3980 // is common. If we implement ListLock as a simple SpinLock it's common |
3980 // If we need to call Allocate() when running as the thread in |
3981 // for the JVM to devolve to yielding with little progress. This is true |
3981 // question then look for the PD calls to initialize native TLS. |
3982 // despite the fact that the critical sections protected by ListLock are |
3982 // Native TLS (Win32/Linux/Solaris) can only be initialized or |
3983 // extremely short. |
3983 // accessed by the associated thread. |
|
3984 // See also pd_initialize(). |
|
3985 // |
3984 // |
3986 // Note that we could defer associating a ParkEvent with a thread |
3985 // TODO-FIXME: ListLock should be of type SpinLock. |
3987 // until the 1st time the thread calls park(). unpark() calls to |
3986 // We should make this a 1st-class type, integrated into the lock |
3988 // an unprovisioned thread would be ignored. The first park() call |
3987 // hierarchy as leaf-locks. Critically, the SpinLock structure |
3989 // for a thread would allocate and associate a ParkEvent and return |
3988 // should have sufficient padding to avoid false-sharing and excessive |
3990 // immediately. |
3989 // cache-coherency traffic. |
3991 |
3990 |
3992 volatile int ParkEvent::ListLock = 0 ; |
3991 |
3993 ParkEvent * volatile ParkEvent::FreeList = NULL ; |
3992 typedef volatile int SpinLockT ; |
3994 |
3993 |
3995 ParkEvent * ParkEvent::Allocate (Thread * t) { |
3994 void Thread::SpinAcquire (volatile int * adr, const char * LockName) { |
3996 // In rare cases -- JVM_RawMonitor* operations -- we can find t == null. |
3995 if (Atomic::cmpxchg (1, adr, 0) == 0) { |
3997 ParkEvent * ev ; |
3996 return ; // normal fast-path return |
3998 |
3997 } |
3999 // Start by trying to recycle an existing but unassociated |
3998 |
4000 // ParkEvent from the global free list. |
3999 // Slow-path : We've encountered contention -- Spin/Yield/Block strategy. |
|
4000 TEVENT (SpinAcquire - ctx) ; |
|
4001 int ctr = 0 ; |
|
4002 int Yields = 0 ; |
4001 for (;;) { |
4003 for (;;) { |
4002 ev = FreeList ; |
4004 while (*adr != 0) { |
4003 if (ev == NULL) break ; |
4005 ++ctr ; |
4004 // 1: Detach - sequester or privatize the list |
4006 if ((ctr & 0xFFF) == 0 || !os::is_MP()) { |
4005 // Tantamount to ev = Swap (&FreeList, NULL) |
4007 if (Yields > 5) { |
4006 if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) { |
4008 // Consider using a simple NakedSleep() instead. |
4007 continue ; |
4009 // Then SpinAcquire could be called by non-JVM threads |
4008 } |
4010 Thread::current()->_ParkEvent->park(1) ; |
4009 |
4011 } else { |
4010 // We've detached the list. The list in-hand is now |
4012 os::NakedYield() ; |
4011 // local to this thread. This thread can operate on the |
4013 ++Yields ; |
4012 // list without risk of interference from other threads. |
4014 } |
4013 // 2: Extract -- pop the 1st element from the list. |
4015 } else { |
4014 ParkEvent * List = ev->FreeNext ; |
4016 SpinPause() ; |
4015 if (List == NULL) break ; |
4017 } |
|
4018 } |
|
4019 if (Atomic::cmpxchg (1, adr, 0) == 0) return ; |
|
4020 } |
|
4021 } |
|
4022 |
|
4023 void Thread::SpinRelease (volatile int * adr) { |
|
4024 assert (*adr != 0, "invariant") ; |
|
4025 OrderAccess::fence() ; // guarantee at least release consistency. |
|
4026 // Roach-motel semantics. |
|
4027 // It's safe if subsequent LDs and STs float "up" into the critical section, |
|
4028 // but prior LDs and STs within the critical section can't be allowed |
|
4029 // to reorder or float past the ST that releases the lock. |
|
4030 *adr = 0 ; |
|
4031 } |
|
4032 |
|
4033 // muxAcquire and muxRelease: |
|
4034 // |
|
4035 // * muxAcquire and muxRelease support a single-word lock-word construct. |
|
4036 // The LSB of the word is set IFF the lock is held. |
|
4037 // The remainder of the word points to the head of a singly-linked list |
|
4038 // of threads blocked on the lock. |
|
4039 // |
|
4040 // * The current implementation of muxAcquire-muxRelease uses its own |
|
4041 // dedicated Thread._MuxEvent instance. If we're interested in |
|
4042 // minimizing the peak number of extant ParkEvent instances then |
|
4043 // we could eliminate _MuxEvent and "borrow" _ParkEvent as long |
|
4044 // as certain invariants were satisfied. Specifically, care would need |
|
4045 // to be taken with regards to consuming unpark() "permits". |
|
4046 // A safe rule of thumb is that a thread would never call muxAcquire() |
|
4047 // if it's enqueued (cxq, EntryList, WaitList, etc) and will subsequently |
|
4048 // park(). Otherwise the _ParkEvent park() operation in muxAcquire() could |
|
4049 // consume an unpark() permit intended for monitorenter, for instance. |
|
4050 // One way around this would be to widen the restricted-range semaphore |
|
4051 // implemented in park(). Another alternative would be to provide |
|
4052 // multiple instances of the PlatformEvent() for each thread. One |
|
4053 // instance would be dedicated to muxAcquire-muxRelease, for instance. |
|
4054 // |
|
4055 // * Usage: |
|
4056 // -- Only as leaf locks |
|
4057 // -- for short-term locking only as muxAcquire does not perform |
|
4058 // thread state transitions. |
|
4059 // |
|
4060 // Alternatives: |
|
4061 // * We could implement muxAcquire and muxRelease with MCS or CLH locks |
|
4062 // but with parking or spin-then-park instead of pure spinning. |
|
4063 // * Use Taura-Oyama-Yonenzawa locks. |
|
4064 // * It's possible to construct a 1-0 lock if we encode the lockword as |
|
4065 // (List,LockByte). Acquire will CAS the full lockword while Release |
|
4066 // will STB 0 into the LockByte. The 1-0 scheme admits stranding, so |
|
4067 // acquiring threads use timers (ParkTimed) to detect and recover from |
|
4068 // the stranding window. Thread/Node structures must be aligned on 256-byte |
|
4069 // boundaries by using placement-new. |
|
4070 // * Augment MCS with advisory back-link fields maintained with CAS(). |
|
4071 // Pictorially: LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner. |
|
4072 // The validity of the backlinks must be ratified before we trust the value. |
|
4073 // If the backlinks are invalid the exiting thread must back-track through the |
|
4074 // the forward links, which are always trustworthy. |
|
4075 // * Add a successor indication. The LockWord is currently encoded as |
|
4076 // (List, LOCKBIT:1). We could also add a SUCCBIT or an explicit _succ variable |
|
4077 // to provide the usual futile-wakeup optimization. |
|
4078 // See RTStt for details. |
|
4079 // * Consider schedctl.sc_nopreempt to cover the critical section. |
|
4080 // |
|
4081 |
|
4082 |
|
4083 typedef volatile intptr_t MutexT ; // Mux Lock-word |
|
4084 enum MuxBits { LOCKBIT = 1 } ; |
|
4085 |
|
4086 void Thread::muxAcquire (volatile intptr_t * Lock, const char * LockName) { |
|
4087 intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ; |
|
4088 if (w == 0) return ; |
|
4089 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { |
|
4090 return ; |
|
4091 } |
|
4092 |
|
4093 TEVENT (muxAcquire - Contention) ; |
|
4094 ParkEvent * const Self = Thread::current()->_MuxEvent ; |
|
4095 assert ((intptr_t(Self) & LOCKBIT) == 0, "invariant") ; |
|
4096 for (;;) { |
|
4097 int its = (os::is_MP() ? 100 : 0) + 1 ; |
|
4098 |
|
4099 // Optional spin phase: spin-then-park strategy |
|
4100 while (--its >= 0) { |
|
4101 w = *Lock ; |
|
4102 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { |
|
4103 return ; |
|
4104 } |
|
4105 } |
|
4106 |
|
4107 Self->reset() ; |
|
4108 Self->OnList = intptr_t(Lock) ; |
|
4109 // The following fence() isn't _strictly necessary as the subsequent |
|
4110 // CAS() both serializes execution and ratifies the fetched *Lock value. |
|
4111 OrderAccess::fence(); |
|
4112 for (;;) { |
|
4113 w = *Lock ; |
|
4114 if ((w & LOCKBIT) == 0) { |
|
4115 if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { |
|
4116 Self->OnList = 0 ; // hygiene - allows stronger asserts |
|
4117 return ; |
|
4118 } |
|
4119 continue ; // Interference -- *Lock changed -- Just retry |
|
4120 } |
|
4121 assert (w & LOCKBIT, "invariant") ; |
|
4122 Self->ListNext = (ParkEvent *) (w & ~LOCKBIT ); |
|
4123 if (Atomic::cmpxchg_ptr (intptr_t(Self)|LOCKBIT, Lock, w) == w) break ; |
|
4124 } |
|
4125 |
|
4126 while (Self->OnList != 0) { |
|
4127 Self->park() ; |
|
4128 } |
|
4129 } |
|
4130 } |
|
4131 |
|
4132 void Thread::muxAcquireW (volatile intptr_t * Lock, ParkEvent * ev) { |
|
4133 intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ; |
|
4134 if (w == 0) return ; |
|
4135 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { |
|
4136 return ; |
|
4137 } |
|
4138 |
|
4139 TEVENT (muxAcquire - Contention) ; |
|
4140 ParkEvent * ReleaseAfter = NULL ; |
|
4141 if (ev == NULL) { |
|
4142 ev = ReleaseAfter = ParkEvent::Allocate (NULL) ; |
|
4143 } |
|
4144 assert ((intptr_t(ev) & LOCKBIT) == 0, "invariant") ; |
|
4145 for (;;) { |
|
4146 guarantee (ev->OnList == 0, "invariant") ; |
|
4147 int its = (os::is_MP() ? 100 : 0) + 1 ; |
|
4148 |
|
4149 // Optional spin phase: spin-then-park strategy |
|
4150 while (--its >= 0) { |
|
4151 w = *Lock ; |
|
4152 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { |
|
4153 if (ReleaseAfter != NULL) { |
|
4154 ParkEvent::Release (ReleaseAfter) ; |
|
4155 } |
|
4156 return ; |
|
4157 } |
|
4158 } |
|
4159 |
|
4160 ev->reset() ; |
|
4161 ev->OnList = intptr_t(Lock) ; |
|
4162 // The following fence() isn't _strictly necessary as the subsequent |
|
4163 // CAS() both serializes execution and ratifies the fetched *Lock value. |
|
4164 OrderAccess::fence(); |
4016 for (;;) { |
4165 for (;;) { |
4017 // 3: Try to reattach the residual list |
4166 w = *Lock ; |
4018 guarantee (List != NULL, "invariant") ; |
4167 if ((w & LOCKBIT) == 0) { |
4019 ParkEvent * Arv = (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; |
4168 if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { |
4020 if (Arv == NULL) break ; |
4169 ev->OnList = 0 ; |
4021 |
4170 // We call ::Release while holding the outer lock, thus |
4022 // New nodes arrived. Try to detach the recent arrivals. |
4171 // artificially lengthening the critical section. |
4023 if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { |
4172 // Consider deferring the ::Release() until the subsequent unlock(), |
4024 continue ; |
4173 // after we've dropped the outer lock. |
|
4174 if (ReleaseAfter != NULL) { |
|
4175 ParkEvent::Release (ReleaseAfter) ; |
|
4176 } |
|
4177 return ; |
4025 } |
4178 } |
4026 guarantee (Arv != NULL, "invariant") ; |
4179 continue ; // Interference -- *Lock changed -- Just retry |
4027 // 4: Merge Arv into List |
4180 } |
4028 ParkEvent * Tail = List ; |
4181 assert (w & LOCKBIT, "invariant") ; |
4029 while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; |
4182 ev->ListNext = (ParkEvent *) (w & ~LOCKBIT ); |
4030 Tail->FreeNext = Arv ; |
4183 if (Atomic::cmpxchg_ptr (intptr_t(ev)|LOCKBIT, Lock, w) == w) break ; |
4031 } |
4184 } |
4032 break ; |
4185 |
4033 } |
4186 while (ev->OnList != 0) { |
4034 |
4187 ev->park() ; |
4035 if (ev != NULL) { |
4188 } |
4036 guarantee (ev->AssociatedWith == NULL, "invariant") ; |
4189 } |
4037 } else { |
4190 } |
4038 // Do this the hard way -- materialize a new ParkEvent. |
4191 |
4039 // In rare cases an allocating thread might detach a long list -- |
4192 // Release() must extract a successor from the list and then wake that thread. |
4040 // installing null into FreeList -- and then stall or be obstructed. |
4193 // It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme |
4041 // A 2nd thread calling Allocate() would see FreeList == null. |
4194 // similar to that used by ParkEvent::Allocate() and ::Release(). DMR-based |
4042 // The list held privately by the 1st thread is unavailable to the 2nd thread. |
4195 // Release() would : |
4043 // In that case the 2nd thread would have to materialize a new ParkEvent, |
4196 // (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list. |
4044 // even though free ParkEvents existed in the system. In this case we end up |
4197 // (B) Extract a successor from the private list "in-hand" |
4045 // with more ParkEvents in circulation than we need, but the race is |
4198 // (C) attempt to CAS() the residual back into *Lock over null. |
4046 // rare and the outcome is benign. Ideally, the # of extant ParkEvents |
4199 // If there were any newly arrived threads and the CAS() would fail. |
4047 // is equal to the maximum # of threads that existed at any one time. |
4200 // In that case Release() would detach the RATs, re-merge the list in-hand |
4048 // Because of the race mentioned above, segments of the freelist |
4201 // with the RATs and repeat as needed. Alternately, Release() might |
4049 // can be transiently inaccessible. At worst we may end up with the |
4202 // detach and extract a successor, but then pass the residual list to the wakee. |
4050 // # of ParkEvents in circulation slightly above the ideal. |
4203 // The wakee would be responsible for reattaching and remerging before it |
4051 // Note that if we didn't have the TSM/immortal constraint, then |
4204 // competed for the lock. |
4052 // when reattaching, above, we could trim the list. |
4205 // |
4053 ev = new ParkEvent () ; |
4206 // Both "pop" and DMR are immune from ABA corruption -- there can be |
4054 guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ; |
4207 // multiple concurrent pushers, but only one popper or detacher. |
4055 } |
4208 // This implementation pops from the head of the list. This is unfair, |
4056 ev->reset() ; // courtesy to caller |
4209 // but tends to provide excellent throughput as hot threads remain hot. |
4057 ev->AssociatedWith = t ; // Associate ev with t |
4210 // (We wake recently run threads first). |
4058 ev->FreeNext = NULL ; |
4211 |
4059 return ev ; |
4212 void Thread::muxRelease (volatile intptr_t * Lock) { |
4060 } |
|
4061 |
|
4062 void ParkEvent::Release (ParkEvent * ev) { |
|
4063 if (ev == NULL) return ; |
|
4064 guarantee (ev->FreeNext == NULL , "invariant") ; |
|
4065 ev->AssociatedWith = NULL ; |
|
4066 for (;;) { |
4213 for (;;) { |
4067 // Push ev onto FreeList |
4214 const intptr_t w = Atomic::cmpxchg_ptr (0, Lock, LOCKBIT) ; |
4068 // The mechanism is "half" lock-free. |
4215 assert (w & LOCKBIT, "invariant") ; |
4069 ParkEvent * List = FreeList ; |
4216 if (w == LOCKBIT) return ; |
4070 ev->FreeNext = List ; |
4217 ParkEvent * List = (ParkEvent *) (w & ~LOCKBIT) ; |
4071 if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ; |
4218 assert (List != NULL, "invariant") ; |
4072 } |
4219 assert (List->OnList == intptr_t(Lock), "invariant") ; |
4073 } |
4220 ParkEvent * nxt = List->ListNext ; |
4074 |
4221 |
4075 // Override operator new and delete so we can ensure that the |
4222 // The following CAS() releases the lock and pops the head element. |
4076 // least significant byte of ParkEvent addresses is 0. |
4223 if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) { |
4077 // Beware that excessive address alignment is undesirable |
4224 continue ; |
4078 // as it can result in D$ index usage imbalance as |
4225 } |
4079 // well as bank access imbalance on Niagara-like platforms, |
4226 List->OnList = 0 ; |
4080 // although Niagara's hash function should help. |
4227 OrderAccess::fence() ; |
4081 |
4228 List->unpark () ; |
4082 void * ParkEvent::operator new (size_t sz) { |
4229 return ; |
4083 return (void *) ((intptr_t (CHeapObj::operator new (sz + 256)) + 256) & -256) ; |
4230 } |
4084 } |
4231 } |
4085 |
4232 |
4086 void ParkEvent::operator delete (void * a) { |
|
4087 // ParkEvents are type-stable and immortal ... |
|
4088 ShouldNotReachHere(); |
|
4089 } |
|
4090 |
|
4091 |
|
4092 // 6399321 As a temporary measure we copied & modified the ParkEvent:: |
|
4093 // allocate() and release() code for use by Parkers. The Parker:: forms |
|
4094 // will eventually be removed as we consolide and shift over to ParkEvents |
|
4095 // for both builtin synchronization and JSR166 operations. |
|
4096 |
|
4097 volatile int Parker::ListLock = 0 ; |
|
4098 Parker * volatile Parker::FreeList = NULL ; |
|
4099 |
|
4100 Parker * Parker::Allocate (JavaThread * t) { |
|
4101 guarantee (t != NULL, "invariant") ; |
|
4102 Parker * p ; |
|
4103 |
|
4104 // Start by trying to recycle an existing but unassociated |
|
4105 // Parker from the global free list. |
|
4106 for (;;) { |
|
4107 p = FreeList ; |
|
4108 if (p == NULL) break ; |
|
4109 // 1: Detach |
|
4110 // Tantamount to p = Swap (&FreeList, NULL) |
|
4111 if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) { |
|
4112 continue ; |
|
4113 } |
|
4114 |
|
4115 // We've detached the list. The list in-hand is now |
|
4116 // local to this thread. This thread can operate on the |
|
4117 // list without risk of interference from other threads. |
|
4118 // 2: Extract -- pop the 1st element from the list. |
|
4119 Parker * List = p->FreeNext ; |
|
4120 if (List == NULL) break ; |
|
4121 for (;;) { |
|
4122 // 3: Try to reattach the residual list |
|
4123 guarantee (List != NULL, "invariant") ; |
|
4124 Parker * Arv = (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; |
|
4125 if (Arv == NULL) break ; |
|
4126 |
|
4127 // New nodes arrived. Try to detach the recent arrivals. |
|
4128 if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { |
|
4129 continue ; |
|
4130 } |
|
4131 guarantee (Arv != NULL, "invariant") ; |
|
4132 // 4: Merge Arv into List |
|
4133 Parker * Tail = List ; |
|
4134 while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; |
|
4135 Tail->FreeNext = Arv ; |
|
4136 } |
|
4137 break ; |
|
4138 } |
|
4139 |
|
4140 if (p != NULL) { |
|
4141 guarantee (p->AssociatedWith == NULL, "invariant") ; |
|
4142 } else { |
|
4143 // Do this the hard way -- materialize a new Parker.. |
|
4144 // In rare cases an allocating thread might detach |
|
4145 // a long list -- installing null into FreeList --and |
|
4146 // then stall. Another thread calling Allocate() would see |
|
4147 // FreeList == null and then invoke the ctor. In this case we |
|
4148 // end up with more Parkers in circulation than we need, but |
|
4149 // the race is rare and the outcome is benign. |
|
4150 // Ideally, the # of extant Parkers is equal to the |
|
4151 // maximum # of threads that existed at any one time. |
|
4152 // Because of the race mentioned above, segments of the |
|
4153 // freelist can be transiently inaccessible. At worst |
|
4154 // we may end up with the # of Parkers in circulation |
|
4155 // slightly above the ideal. |
|
4156 p = new Parker() ; |
|
4157 } |
|
4158 p->AssociatedWith = t ; // Associate p with t |
|
4159 p->FreeNext = NULL ; |
|
4160 return p ; |
|
4161 } |
|
4162 |
|
4163 |
|
4164 void Parker::Release (Parker * p) { |
|
4165 if (p == NULL) return ; |
|
4166 guarantee (p->AssociatedWith != NULL, "invariant") ; |
|
4167 guarantee (p->FreeNext == NULL , "invariant") ; |
|
4168 p->AssociatedWith = NULL ; |
|
4169 for (;;) { |
|
4170 // Push p onto FreeList |
|
4171 Parker * List = FreeList ; |
|
4172 p->FreeNext = List ; |
|
4173 if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ; |
|
4174 } |
|
4175 } |
|
4176 |
4233 |
4177 void Threads::verify() { |
4234 void Threads::verify() { |
4178 ALL_JAVA_THREADS(p) { |
4235 ALL_JAVA_THREADS(p) { |
4179 p->verify(); |
4236 p->verify(); |
4180 } |
4237 } |