Wed, 12 Jan 2011 16:34:25 -0500
6994297: G1: do first-level slow-path allocations with a CAS
Summary: First attempt to allocate out the current alloc region using a CAS instead of taking the Heap_lock (first level of G1's slow allocation path). Only if that fails and it's necessary to replace the current alloc region take the Heap_lock (that's the second level of G1's slow allocation path).
Reviewed-by: johnc, brutisso, ysr
1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jan 12 13:06:00 2011 -0500 1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jan 12 16:34:25 2011 -0500 1.3 @@ -610,6 +610,39 @@ 1.4 // of the free region list is revamped as part of CR 6977804. 1.5 wait_for_cleanup_complete(); 1.6 1.7 + // Other threads might still be trying to allocate using CASes out 1.8 + // of the region we are retiring, as they can do so without holding 1.9 + // the Heap_lock. So we first have to make sure that noone else can 1.10 + // allocate in it by doing a maximal allocation. Even if our CAS 1.11 + // attempt fails a few times, we'll succeed sooner or later given 1.12 + // that a failed CAS attempt mean that the region is getting closed 1.13 + // to being full (someone else succeeded in allocating into it). 1.14 + size_t free_word_size = cur_alloc_region->free() / HeapWordSize; 1.15 + 1.16 + // This is the minimum free chunk we can turn into a dummy 1.17 + // object. If the free space falls below this, then noone can 1.18 + // allocate in this region anyway (all allocation requests will be 1.19 + // of a size larger than this) so we won't have to perform the dummy 1.20 + // allocation. 1.21 + size_t min_word_size_to_fill = CollectedHeap::min_fill_size(); 1.22 + 1.23 + while (free_word_size >= min_word_size_to_fill) { 1.24 + HeapWord* dummy = 1.25 + cur_alloc_region->par_allocate_no_bot_updates(free_word_size); 1.26 + if (dummy != NULL) { 1.27 + // If the allocation was successful we should fill in the space. 1.28 + CollectedHeap::fill_with_object(dummy, free_word_size); 1.29 + break; 1.30 + } 1.31 + 1.32 + free_word_size = cur_alloc_region->free() / HeapWordSize; 1.33 + // It's also possible that someone else beats us to the 1.34 + // allocation and they fill up the region. In that case, we can 1.35 + // just get out of the loop 1.36 + } 1.37 + assert(cur_alloc_region->free() / HeapWordSize < min_word_size_to_fill, 1.38 + "sanity"); 1.39 + 1.40 retire_cur_alloc_region_common(cur_alloc_region); 1.41 assert(_cur_alloc_region == NULL, "post-condition"); 1.42 } 1.43 @@ -661,27 +694,29 @@ 1.44 // young type. 1.45 OrderAccess::storestore(); 1.46 1.47 - // Now allocate out of the new current alloc region. We could 1.48 - // have re-used allocate_from_cur_alloc_region() but its 1.49 - // operation is slightly different to what we need here. First, 1.50 - // allocate_from_cur_alloc_region() is only called outside a 1.51 - // safepoint and will always unlock the Heap_lock if it returns 1.52 - // a non-NULL result. Second, it assumes that the current alloc 1.53 - // region is what's already assigned in _cur_alloc_region. What 1.54 - // we want here is to actually do the allocation first before we 1.55 - // assign the new region to _cur_alloc_region. This ordering is 1.56 - // not currently important, but it will be essential when we 1.57 - // change the code to support CAS allocation in the future (see 1.58 - // CR 6994297). 1.59 - // 1.60 - // This allocate method does BOT updates and we don't need them in 1.61 - // the young generation. This will be fixed in the near future by 1.62 - // CR 6994297. 1.63 - HeapWord* result = new_cur_alloc_region->allocate(word_size); 1.64 + // Now, perform the allocation out of the region we just 1.65 + // allocated. Note that noone else can access that region at 1.66 + // this point (as _cur_alloc_region has not been updated yet), 1.67 + // so we can just go ahead and do the allocation without any 1.68 + // atomics (and we expect this allocation attempt to 1.69 + // suceeded). Given that other threads can attempt an allocation 1.70 + // with a CAS and without needing the Heap_lock, if we assigned 1.71 + // the new region to _cur_alloc_region before first allocating 1.72 + // into it other threads might have filled up the new region 1.73 + // before we got a chance to do the allocation ourselves. In 1.74 + // that case, we would have needed to retire the region, grab a 1.75 + // new one, and go through all this again. Allocating out of the 1.76 + // new region before assigning it to _cur_alloc_region avoids 1.77 + // all this. 1.78 + HeapWord* result = 1.79 + new_cur_alloc_region->allocate_no_bot_updates(word_size); 1.80 assert(result != NULL, "we just allocate out of an empty region " 1.81 "so allocation should have been successful"); 1.82 assert(is_in(result), "result should be in the heap"); 1.83 1.84 + // Now make sure that the store to _cur_alloc_region does not 1.85 + // float above the store to top. 1.86 + OrderAccess::storestore(); 1.87 _cur_alloc_region = new_cur_alloc_region; 1.88 1.89 if (!at_safepoint) { 1.90 @@ -718,6 +753,9 @@ 1.91 for (int try_count = 1; /* we'll return or break */; try_count += 1) { 1.92 bool succeeded = true; 1.93 1.94 + // Every time we go round the loop we should be holding the Heap_lock. 1.95 + assert_heap_locked(); 1.96 + 1.97 { 1.98 // We may have concurrent cleanup working at the time. Wait for 1.99 // it to complete. In the future we would probably want to make 1.100 @@ -734,7 +772,8 @@ 1.101 // attempt as it's redundant (we only reach here after an 1.102 // allocation attempt has been unsuccessful). 1.103 wait_for_cleanup_complete(); 1.104 - HeapWord* result = attempt_allocation(word_size); 1.105 + 1.106 + HeapWord* result = attempt_allocation_locked(word_size); 1.107 if (result != NULL) { 1.108 assert_heap_not_locked(); 1.109 return result; 1.110 @@ -748,7 +787,6 @@ 1.111 if (g1_policy()->can_expand_young_list()) { 1.112 // Yes, we are allowed to expand the young gen. Let's try to 1.113 // allocate a new current alloc region. 1.114 - 1.115 HeapWord* result = 1.116 replace_cur_alloc_region_and_allocate(word_size, 1.117 false, /* at_safepoint */ 1.118 @@ -771,20 +809,23 @@ 1.119 // rather than causing more, now probably unnecessary, GC attempts. 1.120 JavaThread* jthr = JavaThread::current(); 1.121 assert(jthr != NULL, "sanity"); 1.122 - if (!jthr->in_critical()) { 1.123 - MutexUnlocker mul(Heap_lock); 1.124 - GC_locker::stall_until_clear(); 1.125 - 1.126 - // We'll then fall off the end of the ("if GC locker active") 1.127 - // if-statement and retry the allocation further down in the 1.128 - // loop. 1.129 - } else { 1.130 + if (jthr->in_critical()) { 1.131 if (CheckJNICalls) { 1.132 fatal("Possible deadlock due to allocating while" 1.133 " in jni critical section"); 1.134 } 1.135 + // We are returning NULL so the protocol is that we're still 1.136 + // holding the Heap_lock. 1.137 + assert_heap_locked(); 1.138 return NULL; 1.139 } 1.140 + 1.141 + Heap_lock->unlock(); 1.142 + GC_locker::stall_until_clear(); 1.143 + 1.144 + // No need to relock the Heap_lock. We'll fall off to the code 1.145 + // below the else-statement which assumes that we are not 1.146 + // holding the Heap_lock. 1.147 } else { 1.148 // We are not locked out. So, let's try to do a GC. The VM op 1.149 // will retry the allocation before it completes. 1.150 @@ -805,11 +846,10 @@ 1.151 dirty_young_block(result, word_size); 1.152 return result; 1.153 } 1.154 - 1.155 - Heap_lock->lock(); 1.156 } 1.157 1.158 - assert_heap_locked(); 1.159 + // Both paths that get us here from above unlock the Heap_lock. 1.160 + assert_heap_not_locked(); 1.161 1.162 // We can reach here when we were unsuccessful in doing a GC, 1.163 // because another thread beat us to it, or because we were locked 1.164 @@ -948,10 +988,8 @@ 1.165 if (!expect_null_cur_alloc_region) { 1.166 HeapRegion* cur_alloc_region = _cur_alloc_region; 1.167 if (cur_alloc_region != NULL) { 1.168 - // This allocate method does BOT updates and we don't need them in 1.169 - // the young generation. This will be fixed in the near future by 1.170 - // CR 6994297. 1.171 - HeapWord* result = cur_alloc_region->allocate(word_size); 1.172 + // We are at a safepoint so no reason to use the MT-safe version. 1.173 + HeapWord* result = cur_alloc_region->allocate_no_bot_updates(word_size); 1.174 if (result != NULL) { 1.175 assert(is_in(result), "result should be in the heap"); 1.176 1.177 @@ -983,20 +1021,17 @@ 1.178 assert_heap_not_locked_and_not_at_safepoint(); 1.179 assert(!isHumongous(word_size), "we do not allow TLABs of humongous size"); 1.180 1.181 - Heap_lock->lock(); 1.182 - 1.183 - // First attempt: try allocating out of the current alloc region or 1.184 - // after replacing the current alloc region. 1.185 + // First attempt: Try allocating out of the current alloc region 1.186 + // using a CAS. If that fails, take the Heap_lock and retry the 1.187 + // allocation, potentially replacing the current alloc region. 1.188 HeapWord* result = attempt_allocation(word_size); 1.189 if (result != NULL) { 1.190 assert_heap_not_locked(); 1.191 return result; 1.192 } 1.193 1.194 - assert_heap_locked(); 1.195 - 1.196 - // Second attempt: go into the even slower path where we might 1.197 - // try to schedule a collection. 1.198 + // Second attempt: Go to the slower path where we might try to 1.199 + // schedule a collection. 1.200 result = attempt_allocation_slow(word_size); 1.201 if (result != NULL) { 1.202 assert_heap_not_locked(); 1.203 @@ -1004,6 +1039,7 @@ 1.204 } 1.205 1.206 assert_heap_locked(); 1.207 + // Need to unlock the Heap_lock before returning. 1.208 Heap_lock->unlock(); 1.209 return NULL; 1.210 } 1.211 @@ -1022,11 +1058,10 @@ 1.212 for (int try_count = 1; /* we'll return */; try_count += 1) { 1.213 unsigned int gc_count_before; 1.214 { 1.215 - Heap_lock->lock(); 1.216 - 1.217 if (!isHumongous(word_size)) { 1.218 - // First attempt: try allocating out of the current alloc 1.219 - // region or after replacing the current alloc region. 1.220 + // First attempt: Try allocating out of the current alloc region 1.221 + // using a CAS. If that fails, take the Heap_lock and retry the 1.222 + // allocation, potentially replacing the current alloc region. 1.223 HeapWord* result = attempt_allocation(word_size); 1.224 if (result != NULL) { 1.225 assert_heap_not_locked(); 1.226 @@ -1035,14 +1070,17 @@ 1.227 1.228 assert_heap_locked(); 1.229 1.230 - // Second attempt: go into the even slower path where we might 1.231 - // try to schedule a collection. 1.232 + // Second attempt: Go to the slower path where we might try to 1.233 + // schedule a collection. 1.234 result = attempt_allocation_slow(word_size); 1.235 if (result != NULL) { 1.236 assert_heap_not_locked(); 1.237 return result; 1.238 } 1.239 } else { 1.240 + // attempt_allocation_humongous() requires the Heap_lock to be held. 1.241 + Heap_lock->lock(); 1.242 + 1.243 HeapWord* result = attempt_allocation_humongous(word_size, 1.244 false /* at_safepoint */); 1.245 if (result != NULL) { 1.246 @@ -1054,7 +1092,8 @@ 1.247 assert_heap_locked(); 1.248 // Read the gc count while the heap lock is held. 1.249 gc_count_before = SharedHeap::heap()->total_collections(); 1.250 - // We cannot be at a safepoint, so it is safe to unlock the Heap_lock 1.251 + 1.252 + // Release the Heap_lock before attempting the collection. 1.253 Heap_lock->unlock(); 1.254 } 1.255
2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jan 12 13:06:00 2011 -0500 2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jan 12 16:34:25 2011 -0500 2.3 @@ -430,7 +430,8 @@ 2.4 bool* gc_overhead_limit_was_exceeded); 2.5 2.6 // The following methods, allocate_from_cur_allocation_region(), 2.7 - // attempt_allocation(), replace_cur_alloc_region_and_allocate(), 2.8 + // attempt_allocation(), attempt_allocation_locked(), 2.9 + // replace_cur_alloc_region_and_allocate(), 2.10 // attempt_allocation_slow(), and attempt_allocation_humongous() 2.11 // have very awkward pre- and post-conditions with respect to 2.12 // locking: 2.13 @@ -481,20 +482,30 @@ 2.14 // successfully manage to allocate it, or NULL. 2.15 2.16 // It tries to satisfy an allocation request out of the current 2.17 - // allocating region, which is passed as a parameter. It assumes 2.18 - // that the caller has checked that the current allocating region is 2.19 - // not NULL. Given that the caller has to check the current 2.20 - // allocating region for at least NULL, it might as well pass it as 2.21 - // the first parameter so that the method doesn't have to read it 2.22 - // from the _cur_alloc_region field again. 2.23 + // alloc region, which is passed as a parameter. It assumes that the 2.24 + // caller has checked that the current alloc region is not NULL. 2.25 + // Given that the caller has to check the current alloc region for 2.26 + // at least NULL, it might as well pass it as the first parameter so 2.27 + // that the method doesn't have to read it from the 2.28 + // _cur_alloc_region field again. It is called from both 2.29 + // attempt_allocation() and attempt_allocation_locked() and the 2.30 + // with_heap_lock parameter indicates whether the caller was holding 2.31 + // the heap lock when it called it or not. 2.32 inline HeapWord* allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region, 2.33 - size_t word_size); 2.34 + size_t word_size, 2.35 + bool with_heap_lock); 2.36 2.37 - // It attempts to allocate out of the current alloc region. If that 2.38 - // fails, it retires the current alloc region (if there is one), 2.39 - // tries to get a new one and retries the allocation. 2.40 + // First-level of allocation slow path: it attempts to allocate out 2.41 + // of the current alloc region in a lock-free manner using a CAS. If 2.42 + // that fails it takes the Heap_lock and calls 2.43 + // attempt_allocation_locked() for the second-level slow path. 2.44 inline HeapWord* attempt_allocation(size_t word_size); 2.45 2.46 + // Second-level of allocation slow path: while holding the Heap_lock 2.47 + // it tries to allocate out of the current alloc region and, if that 2.48 + // fails, tries to allocate out of a new current alloc region. 2.49 + inline HeapWord* attempt_allocation_locked(size_t word_size); 2.50 + 2.51 // It assumes that the current alloc region has been retired and 2.52 // tries to allocate a new one. If it's successful, it performs the 2.53 // allocation out of the new current alloc region and updates 2.54 @@ -506,11 +517,11 @@ 2.55 bool do_dirtying, 2.56 bool can_expand); 2.57 2.58 - // The slow path when we are unable to allocate a new current alloc 2.59 - // region to satisfy an allocation request (i.e., when 2.60 - // attempt_allocation() fails). It will try to do an evacuation 2.61 - // pause, which might stall due to the GC locker, and retry the 2.62 - // allocation attempt when appropriate. 2.63 + // Third-level of allocation slow path: when we are unable to 2.64 + // allocate a new current alloc region to satisfy an allocation 2.65 + // request (i.e., when attempt_allocation_locked() fails). It will 2.66 + // try to do an evacuation pause, which might stall due to the GC 2.67 + // locker, and retry the allocation attempt when appropriate. 2.68 HeapWord* attempt_allocation_slow(size_t word_size); 2.69 2.70 // The method that tries to satisfy a humongous allocation
3.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Wed Jan 12 13:06:00 2011 -0500 3.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Wed Jan 12 16:34:25 2011 -0500 3.3 @@ -1,5 +1,5 @@ 3.4 /* 3.5 - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 3.6 + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.8 * 3.9 * This code is free software; you can redistribute it and/or modify it 3.10 @@ -63,10 +63,12 @@ 3.11 // assumptions of this method (and other related ones). 3.12 inline HeapWord* 3.13 G1CollectedHeap::allocate_from_cur_alloc_region(HeapRegion* cur_alloc_region, 3.14 - size_t word_size) { 3.15 - assert_heap_locked_and_not_at_safepoint(); 3.16 + size_t word_size, 3.17 + bool with_heap_lock) { 3.18 + assert_not_at_safepoint(); 3.19 + assert(with_heap_lock == Heap_lock->owned_by_self(), 3.20 + "with_heap_lock and Heap_lock->owned_by_self() should be a tautology"); 3.21 assert(cur_alloc_region != NULL, "pre-condition of the method"); 3.22 - assert(cur_alloc_region == _cur_alloc_region, "pre-condition of the method"); 3.23 assert(cur_alloc_region->is_young(), 3.24 "we only support young current alloc regions"); 3.25 assert(!isHumongous(word_size), "allocate_from_cur_alloc_region() " 3.26 @@ -76,20 +78,24 @@ 3.27 assert(!cur_alloc_region->is_empty(), 3.28 err_msg("region ["PTR_FORMAT","PTR_FORMAT"] should not be empty", 3.29 cur_alloc_region->bottom(), cur_alloc_region->end())); 3.30 - // This allocate method does BOT updates and we don't need them in 3.31 - // the young generation. This will be fixed in the near future by 3.32 - // CR 6994297. 3.33 - HeapWord* result = cur_alloc_region->allocate(word_size); 3.34 + HeapWord* result = cur_alloc_region->par_allocate_no_bot_updates(word_size); 3.35 if (result != NULL) { 3.36 assert(is_in(result), "result should be in the heap"); 3.37 - Heap_lock->unlock(); 3.38 3.39 + if (with_heap_lock) { 3.40 + Heap_lock->unlock(); 3.41 + } 3.42 + assert_heap_not_locked(); 3.43 // Do the dirtying after we release the Heap_lock. 3.44 dirty_young_block(result, word_size); 3.45 return result; 3.46 } 3.47 3.48 - assert_heap_locked(); 3.49 + if (with_heap_lock) { 3.50 + assert_heap_locked(); 3.51 + } else { 3.52 + assert_heap_not_locked(); 3.53 + } 3.54 return NULL; 3.55 } 3.56 3.57 @@ -97,31 +103,27 @@ 3.58 // assumptions of this method (and other related ones). 3.59 inline HeapWord* 3.60 G1CollectedHeap::attempt_allocation(size_t word_size) { 3.61 - assert_heap_locked_and_not_at_safepoint(); 3.62 + assert_heap_not_locked_and_not_at_safepoint(); 3.63 assert(!isHumongous(word_size), "attempt_allocation() should not be called " 3.64 "for humongous allocation requests"); 3.65 3.66 HeapRegion* cur_alloc_region = _cur_alloc_region; 3.67 if (cur_alloc_region != NULL) { 3.68 HeapWord* result = allocate_from_cur_alloc_region(cur_alloc_region, 3.69 - word_size); 3.70 + word_size, 3.71 + false /* with_heap_lock */); 3.72 + assert_heap_not_locked(); 3.73 if (result != NULL) { 3.74 - assert_heap_not_locked(); 3.75 return result; 3.76 } 3.77 - 3.78 - assert_heap_locked(); 3.79 - 3.80 - // Since we couldn't successfully allocate into it, retire the 3.81 - // current alloc region. 3.82 - retire_cur_alloc_region(cur_alloc_region); 3.83 } 3.84 3.85 - // Try to get a new region and allocate out of it 3.86 - HeapWord* result = replace_cur_alloc_region_and_allocate(word_size, 3.87 - false, /* at_safepoint */ 3.88 - true, /* do_dirtying */ 3.89 - false /* can_expand */); 3.90 + // Our attempt to allocate lock-free failed as the current 3.91 + // allocation region is either NULL or full. So, we'll now take the 3.92 + // Heap_lock and retry. 3.93 + Heap_lock->lock(); 3.94 + 3.95 + HeapWord* result = attempt_allocation_locked(word_size); 3.96 if (result != NULL) { 3.97 assert_heap_not_locked(); 3.98 return result; 3.99 @@ -145,6 +147,45 @@ 3.100 _cur_alloc_region = NULL; 3.101 } 3.102 3.103 +inline HeapWord* 3.104 +G1CollectedHeap::attempt_allocation_locked(size_t word_size) { 3.105 + assert_heap_locked_and_not_at_safepoint(); 3.106 + assert(!isHumongous(word_size), "attempt_allocation_locked() " 3.107 + "should not be called for humongous allocation requests"); 3.108 + 3.109 + // First, reread the current alloc region and retry the allocation 3.110 + // in case somebody replaced it while we were waiting to get the 3.111 + // Heap_lock. 3.112 + HeapRegion* cur_alloc_region = _cur_alloc_region; 3.113 + if (cur_alloc_region != NULL) { 3.114 + HeapWord* result = allocate_from_cur_alloc_region( 3.115 + cur_alloc_region, word_size, 3.116 + true /* with_heap_lock */); 3.117 + if (result != NULL) { 3.118 + assert_heap_not_locked(); 3.119 + return result; 3.120 + } 3.121 + 3.122 + // We failed to allocate out of the current alloc region, so let's 3.123 + // retire it before getting a new one. 3.124 + retire_cur_alloc_region(cur_alloc_region); 3.125 + } 3.126 + 3.127 + assert_heap_locked(); 3.128 + // Try to get a new region and allocate out of it 3.129 + HeapWord* result = replace_cur_alloc_region_and_allocate(word_size, 3.130 + false, /* at_safepoint */ 3.131 + true, /* do_dirtying */ 3.132 + false /* can_expand */); 3.133 + if (result != NULL) { 3.134 + assert_heap_not_locked(); 3.135 + return result; 3.136 + } 3.137 + 3.138 + assert_heap_locked(); 3.139 + return NULL; 3.140 +} 3.141 + 3.142 // It dirties the cards that cover the block so that so that the post 3.143 // write barrier never queues anything when updating objects on this 3.144 // block. It is assumed (and in fact we assert) that the block
4.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jan 12 13:06:00 2011 -0500 4.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jan 12 16:34:25 2011 -0500 4.3 @@ -372,6 +372,15 @@ 4.4 Allocated 4.5 }; 4.6 4.7 + inline HeapWord* par_allocate_no_bot_updates(size_t word_size) { 4.8 + assert(is_young(), "we can only skip BOT updates on young regions"); 4.9 + return ContiguousSpace::par_allocate(word_size); 4.10 + } 4.11 + inline HeapWord* allocate_no_bot_updates(size_t word_size) { 4.12 + assert(is_young(), "we can only skip BOT updates on young regions"); 4.13 + return ContiguousSpace::allocate(word_size); 4.14 + } 4.15 + 4.16 // If this region is a member of a HeapRegionSeq, the index in that 4.17 // sequence, otherwise -1. 4.18 int hrs_index() const { return _hrs_index; }