1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,315 @@ 1.4 +/* 1.5 + * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +# include "incls/_precompiled.incl" 1.29 +# include "incls/_parCardTableModRefBS.cpp.incl" 1.30 + 1.31 +void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr, 1.32 + DirtyCardToOopClosure* dcto_cl, 1.33 + MemRegionClosure* cl, 1.34 + bool clear, 1.35 + int n_threads) { 1.36 + if (n_threads > 0) { 1.37 + assert(n_threads == (int)ParallelGCThreads, "# worker threads != # requested!"); 1.38 + 1.39 + // Make sure the LNC array is valid for the space. 1.40 + jbyte** lowest_non_clean; 1.41 + uintptr_t lowest_non_clean_base_chunk_index; 1.42 + size_t lowest_non_clean_chunk_size; 1.43 + get_LNC_array_for_space(sp, lowest_non_clean, 1.44 + lowest_non_clean_base_chunk_index, 1.45 + lowest_non_clean_chunk_size); 1.46 + 1.47 + int n_strides = n_threads * StridesPerThread; 1.48 + SequentialSubTasksDone* pst = sp->par_seq_tasks(); 1.49 + pst->set_par_threads(n_threads); 1.50 + pst->set_n_tasks(n_strides); 1.51 + 1.52 + int stride = 0; 1.53 + while (!pst->is_task_claimed(/* reference */ stride)) { 1.54 + process_stride(sp, mr, stride, n_strides, dcto_cl, cl, clear, 1.55 + lowest_non_clean, 1.56 + lowest_non_clean_base_chunk_index, 1.57 + lowest_non_clean_chunk_size); 1.58 + } 1.59 + if (pst->all_tasks_completed()) { 1.60 + // Clear lowest_non_clean array for next time. 1.61 + intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); 1.62 + uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); 1.63 + for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { 1.64 + intptr_t ind = ch - lowest_non_clean_base_chunk_index; 1.65 + assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, 1.66 + "Bounds error"); 1.67 + lowest_non_clean[ind] = NULL; 1.68 + } 1.69 + } 1.70 + } 1.71 +} 1.72 + 1.73 +void 1.74 +CardTableModRefBS:: 1.75 +process_stride(Space* sp, 1.76 + MemRegion used, 1.77 + jint stride, int n_strides, 1.78 + DirtyCardToOopClosure* dcto_cl, 1.79 + MemRegionClosure* cl, 1.80 + bool clear, 1.81 + jbyte** lowest_non_clean, 1.82 + uintptr_t lowest_non_clean_base_chunk_index, 1.83 + size_t lowest_non_clean_chunk_size) { 1.84 + // We don't have to go downwards here; it wouldn't help anyway, 1.85 + // because of parallelism. 1.86 + 1.87 + // Find the first card address of the first chunk in the stride that is 1.88 + // at least "bottom" of the used region. 1.89 + jbyte* start_card = byte_for(used.start()); 1.90 + jbyte* end_card = byte_after(used.last()); 1.91 + uintptr_t start_chunk = addr_to_chunk_index(used.start()); 1.92 + uintptr_t start_chunk_stride_num = start_chunk % n_strides; 1.93 + jbyte* chunk_card_start; 1.94 + 1.95 + if ((uintptr_t)stride >= start_chunk_stride_num) { 1.96 + chunk_card_start = (jbyte*)(start_card + 1.97 + (stride - start_chunk_stride_num) * 1.98 + CardsPerStrideChunk); 1.99 + } else { 1.100 + // Go ahead to the next chunk group boundary, then to the requested stride. 1.101 + chunk_card_start = (jbyte*)(start_card + 1.102 + (n_strides - start_chunk_stride_num + stride) * 1.103 + CardsPerStrideChunk); 1.104 + } 1.105 + 1.106 + while (chunk_card_start < end_card) { 1.107 + // We don't have to go downwards here; it wouldn't help anyway, 1.108 + // because of parallelism. (We take care with "min_done"; see below.) 1.109 + // Invariant: chunk_mr should be fully contained within the "used" region. 1.110 + jbyte* chunk_card_end = chunk_card_start + CardsPerStrideChunk; 1.111 + MemRegion chunk_mr = MemRegion(addr_for(chunk_card_start), 1.112 + chunk_card_end >= end_card ? 1.113 + used.end() : addr_for(chunk_card_end)); 1.114 + assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)"); 1.115 + assert(used.contains(chunk_mr), "chunk_mr should be subset of used"); 1.116 + 1.117 + // Process the chunk. 1.118 + process_chunk_boundaries(sp, 1.119 + dcto_cl, 1.120 + chunk_mr, 1.121 + used, 1.122 + lowest_non_clean, 1.123 + lowest_non_clean_base_chunk_index, 1.124 + lowest_non_clean_chunk_size); 1.125 + 1.126 + non_clean_card_iterate_work(chunk_mr, cl, clear); 1.127 + 1.128 + // Find the next chunk of the stride. 1.129 + chunk_card_start += CardsPerStrideChunk * n_strides; 1.130 + } 1.131 +} 1.132 + 1.133 +void 1.134 +CardTableModRefBS:: 1.135 +process_chunk_boundaries(Space* sp, 1.136 + DirtyCardToOopClosure* dcto_cl, 1.137 + MemRegion chunk_mr, 1.138 + MemRegion used, 1.139 + jbyte** lowest_non_clean, 1.140 + uintptr_t lowest_non_clean_base_chunk_index, 1.141 + size_t lowest_non_clean_chunk_size) 1.142 +{ 1.143 + // We must worry about the chunk boundaries. 1.144 + 1.145 + // First, set our max_to_do: 1.146 + HeapWord* max_to_do = NULL; 1.147 + uintptr_t cur_chunk_index = addr_to_chunk_index(chunk_mr.start()); 1.148 + cur_chunk_index = cur_chunk_index - lowest_non_clean_base_chunk_index; 1.149 + 1.150 + if (chunk_mr.end() < used.end()) { 1.151 + // This is not the last chunk in the used region. What is the last 1.152 + // object? 1.153 + HeapWord* last_block = sp->block_start(chunk_mr.end()); 1.154 + assert(last_block <= chunk_mr.end(), "In case this property changes."); 1.155 + if (last_block == chunk_mr.end() 1.156 + || !sp->block_is_obj(last_block)) { 1.157 + max_to_do = chunk_mr.end(); 1.158 + 1.159 + } else { 1.160 + // It is an object and starts before the end of the current chunk. 1.161 + // last_obj_card is the card corresponding to the start of the last object 1.162 + // in the chunk. Note that the last object may not start in 1.163 + // the chunk. 1.164 + jbyte* last_obj_card = byte_for(last_block); 1.165 + if (!card_may_have_been_dirty(*last_obj_card)) { 1.166 + // The card containing the head is not dirty. Any marks in 1.167 + // subsequent cards still in this chunk must have been made 1.168 + // precisely; we can cap processing at the end. 1.169 + max_to_do = chunk_mr.end(); 1.170 + } else { 1.171 + // The last object must be considered dirty, and extends onto the 1.172 + // following chunk. Look for a dirty card in that chunk that will 1.173 + // bound our processing. 1.174 + jbyte* limit_card = NULL; 1.175 + size_t last_block_size = sp->block_size(last_block); 1.176 + jbyte* last_card_of_last_obj = 1.177 + byte_for(last_block + last_block_size - 1); 1.178 + jbyte* first_card_of_next_chunk = byte_for(chunk_mr.end()); 1.179 + // This search potentially goes a long distance looking 1.180 + // for the next card that will be scanned. For example, 1.181 + // an object that is an array of primitives will not 1.182 + // have any cards covering regions interior to the array 1.183 + // that will need to be scanned. The scan can be terminated 1.184 + // at the last card of the next chunk. That would leave 1.185 + // limit_card as NULL and would result in "max_to_do" 1.186 + // being set with the LNC value or with the end 1.187 + // of the last block. 1.188 + jbyte* last_card_of_next_chunk = first_card_of_next_chunk + 1.189 + CardsPerStrideChunk; 1.190 + assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start()) 1.191 + == CardsPerStrideChunk, "last card of next chunk may be wrong"); 1.192 + jbyte* last_card_to_check = (jbyte*) MIN2(last_card_of_last_obj, 1.193 + last_card_of_next_chunk); 1.194 + for (jbyte* cur = first_card_of_next_chunk; 1.195 + cur <= last_card_to_check; cur++) { 1.196 + if (card_will_be_scanned(*cur)) { 1.197 + limit_card = cur; break; 1.198 + } 1.199 + } 1.200 + assert(0 <= cur_chunk_index+1 && 1.201 + cur_chunk_index+1 < lowest_non_clean_chunk_size, 1.202 + "Bounds error."); 1.203 + // LNC for the next chunk 1.204 + jbyte* lnc_card = lowest_non_clean[cur_chunk_index+1]; 1.205 + if (limit_card == NULL) { 1.206 + limit_card = lnc_card; 1.207 + } 1.208 + if (limit_card != NULL) { 1.209 + if (lnc_card != NULL) { 1.210 + limit_card = (jbyte*)MIN2((intptr_t)limit_card, 1.211 + (intptr_t)lnc_card); 1.212 + } 1.213 + max_to_do = addr_for(limit_card); 1.214 + } else { 1.215 + max_to_do = last_block + last_block_size; 1.216 + } 1.217 + } 1.218 + } 1.219 + assert(max_to_do != NULL, "OOPS!"); 1.220 + } else { 1.221 + max_to_do = used.end(); 1.222 + } 1.223 + // Now we can set the closure we're using so it doesn't to beyond 1.224 + // max_to_do. 1.225 + dcto_cl->set_min_done(max_to_do); 1.226 +#ifndef PRODUCT 1.227 + dcto_cl->set_last_bottom(max_to_do); 1.228 +#endif 1.229 + 1.230 + // Now we set *our" lowest_non_clean entry. 1.231 + // Find the object that spans our boundary, if one exists. 1.232 + // Nothing to do on the first chunk. 1.233 + if (chunk_mr.start() > used.start()) { 1.234 + // first_block is the block possibly spanning the chunk start 1.235 + HeapWord* first_block = sp->block_start(chunk_mr.start()); 1.236 + // Does the block span the start of the chunk and is it 1.237 + // an object? 1.238 + if (first_block < chunk_mr.start() && 1.239 + sp->block_is_obj(first_block)) { 1.240 + jbyte* first_dirty_card = NULL; 1.241 + jbyte* last_card_of_first_obj = 1.242 + byte_for(first_block + sp->block_size(first_block) - 1); 1.243 + jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start()); 1.244 + jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last()); 1.245 + jbyte* last_card_to_check = 1.246 + (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk, 1.247 + (intptr_t) last_card_of_first_obj); 1.248 + for (jbyte* cur = first_card_of_cur_chunk; 1.249 + cur <= last_card_to_check; cur++) { 1.250 + if (card_will_be_scanned(*cur)) { 1.251 + first_dirty_card = cur; break; 1.252 + } 1.253 + } 1.254 + if (first_dirty_card != NULL) { 1.255 + assert(0 <= cur_chunk_index && 1.256 + cur_chunk_index < lowest_non_clean_chunk_size, 1.257 + "Bounds error."); 1.258 + lowest_non_clean[cur_chunk_index] = first_dirty_card; 1.259 + } 1.260 + } 1.261 + } 1.262 +} 1.263 + 1.264 +void 1.265 +CardTableModRefBS:: 1.266 +get_LNC_array_for_space(Space* sp, 1.267 + jbyte**& lowest_non_clean, 1.268 + uintptr_t& lowest_non_clean_base_chunk_index, 1.269 + size_t& lowest_non_clean_chunk_size) { 1.270 + 1.271 + int i = find_covering_region_containing(sp->bottom()); 1.272 + MemRegion covered = _covered[i]; 1.273 + size_t n_chunks = chunks_to_cover(covered); 1.274 + 1.275 + // Only the first thread to obtain the lock will resize the 1.276 + // LNC array for the covered region. Any later expansion can't affect 1.277 + // the used_at_save_marks region. 1.278 + // (I observed a bug in which the first thread to execute this would 1.279 + // resize, and then it would cause "expand_and_allocates" that would 1.280 + // Increase the number of chunks in the covered region. Then a second 1.281 + // thread would come and execute this, see that the size didn't match, 1.282 + // and free and allocate again. So the first thread would be using a 1.283 + // freed "_lowest_non_clean" array.) 1.284 + 1.285 + // Do a dirty read here. If we pass the conditional then take the rare 1.286 + // event lock and do the read again in case some other thread had already 1.287 + // succeeded and done the resize. 1.288 + int cur_collection = Universe::heap()->total_collections(); 1.289 + if (_last_LNC_resizing_collection[i] != cur_collection) { 1.290 + MutexLocker x(ParGCRareEvent_lock); 1.291 + if (_last_LNC_resizing_collection[i] != cur_collection) { 1.292 + if (_lowest_non_clean[i] == NULL || 1.293 + n_chunks != _lowest_non_clean_chunk_size[i]) { 1.294 + 1.295 + // Should we delete the old? 1.296 + if (_lowest_non_clean[i] != NULL) { 1.297 + assert(n_chunks != _lowest_non_clean_chunk_size[i], 1.298 + "logical consequence"); 1.299 + FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i]); 1.300 + _lowest_non_clean[i] = NULL; 1.301 + } 1.302 + // Now allocate a new one if necessary. 1.303 + if (_lowest_non_clean[i] == NULL) { 1.304 + _lowest_non_clean[i] = NEW_C_HEAP_ARRAY(CardPtr, n_chunks); 1.305 + _lowest_non_clean_chunk_size[i] = n_chunks; 1.306 + _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start()); 1.307 + for (int j = 0; j < (int)n_chunks; j++) 1.308 + _lowest_non_clean[i][j] = NULL; 1.309 + } 1.310 + } 1.311 + _last_LNC_resizing_collection[i] = cur_collection; 1.312 + } 1.313 + } 1.314 + // In any case, now do the initialization. 1.315 + lowest_non_clean = _lowest_non_clean[i]; 1.316 + lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i]; 1.317 + lowest_non_clean_chunk_size = _lowest_non_clean_chunk_size[i]; 1.318 +}