Tue, 10 May 2011 00:33:21 -0700
6883834: ParNew: assert(!_g->to()->is_in_reserved(obj),"Scanning field twice?") with LargeObjects tests
Summary: Fixed process_chunk_boundaries(), used for parallel card scanning when using ParNew/CMS, so as to prevent double-scanning, or worse, non-scanning of imprecisely marked objects exceeding parallel chunk size. Made some sizing parameters for parallel card scanning diagnostic, disabled ParallelGCRetainPLAB, and elaborated and clarified some comments.
Reviewed-by: stefank, johnc
duke@435 | 1 | /* |
ysr@2788 | 2 | * Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #include "precompiled.hpp" |
stefank@2314 | 26 | #include "memory/allocation.inline.hpp" |
stefank@2314 | 27 | #include "memory/cardTableModRefBS.hpp" |
stefank@2314 | 28 | #include "memory/cardTableRS.hpp" |
stefank@2314 | 29 | #include "memory/sharedHeap.hpp" |
stefank@2314 | 30 | #include "memory/space.inline.hpp" |
stefank@2314 | 31 | #include "memory/universe.hpp" |
ysr@2889 | 32 | #include "oops/oop.inline.hpp" |
stefank@2314 | 33 | #include "runtime/java.hpp" |
stefank@2314 | 34 | #include "runtime/mutexLocker.hpp" |
stefank@2314 | 35 | #include "runtime/virtualspace.hpp" |
duke@435 | 36 | |
ysr@2819 | 37 | void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, |
ysr@2889 | 38 | OopsInGenClosure* cl, |
ysr@2889 | 39 | CardTableRS* ct, |
ysr@2819 | 40 | int n_threads) { |
ysr@2819 | 41 | assert(n_threads > 0, "Error: expected n_threads > 0"); |
ysr@2819 | 42 | assert((n_threads == 1 && ParallelGCThreads == 0) || |
ysr@2819 | 43 | n_threads <= (int)ParallelGCThreads, |
ysr@2819 | 44 | "# worker threads != # requested!"); |
ysr@2819 | 45 | // Make sure the LNC array is valid for the space. |
ysr@2819 | 46 | jbyte** lowest_non_clean; |
ysr@2819 | 47 | uintptr_t lowest_non_clean_base_chunk_index; |
ysr@2819 | 48 | size_t lowest_non_clean_chunk_size; |
ysr@2819 | 49 | get_LNC_array_for_space(sp, lowest_non_clean, |
ysr@2819 | 50 | lowest_non_clean_base_chunk_index, |
ysr@2819 | 51 | lowest_non_clean_chunk_size); |
duke@435 | 52 | |
ysr@2889 | 53 | int n_strides = n_threads * ParGCStridesPerThread; |
ysr@2819 | 54 | SequentialSubTasksDone* pst = sp->par_seq_tasks(); |
ysr@2819 | 55 | pst->set_n_threads(n_threads); |
ysr@2819 | 56 | pst->set_n_tasks(n_strides); |
duke@435 | 57 | |
ysr@2819 | 58 | int stride = 0; |
ysr@2819 | 59 | while (!pst->is_task_claimed(/* reference */ stride)) { |
ysr@2889 | 60 | process_stride(sp, mr, stride, n_strides, cl, ct, |
ysr@2819 | 61 | lowest_non_clean, |
ysr@2819 | 62 | lowest_non_clean_base_chunk_index, |
ysr@2819 | 63 | lowest_non_clean_chunk_size); |
ysr@2819 | 64 | } |
ysr@2819 | 65 | if (pst->all_tasks_completed()) { |
ysr@2819 | 66 | // Clear lowest_non_clean array for next time. |
ysr@2819 | 67 | intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); |
ysr@2819 | 68 | uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); |
ysr@2819 | 69 | for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { |
ysr@2819 | 70 | intptr_t ind = ch - lowest_non_clean_base_chunk_index; |
ysr@2819 | 71 | assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, |
ysr@2819 | 72 | "Bounds error"); |
ysr@2819 | 73 | lowest_non_clean[ind] = NULL; |
duke@435 | 74 | } |
duke@435 | 75 | } |
duke@435 | 76 | } |
duke@435 | 77 | |
duke@435 | 78 | void |
duke@435 | 79 | CardTableModRefBS:: |
duke@435 | 80 | process_stride(Space* sp, |
duke@435 | 81 | MemRegion used, |
duke@435 | 82 | jint stride, int n_strides, |
ysr@2889 | 83 | OopsInGenClosure* cl, |
ysr@2889 | 84 | CardTableRS* ct, |
duke@435 | 85 | jbyte** lowest_non_clean, |
duke@435 | 86 | uintptr_t lowest_non_clean_base_chunk_index, |
duke@435 | 87 | size_t lowest_non_clean_chunk_size) { |
ysr@2889 | 88 | // We go from higher to lower addresses here; it wouldn't help that much |
ysr@2889 | 89 | // because of the strided parallelism pattern used here. |
duke@435 | 90 | |
duke@435 | 91 | // Find the first card address of the first chunk in the stride that is |
duke@435 | 92 | // at least "bottom" of the used region. |
duke@435 | 93 | jbyte* start_card = byte_for(used.start()); |
duke@435 | 94 | jbyte* end_card = byte_after(used.last()); |
duke@435 | 95 | uintptr_t start_chunk = addr_to_chunk_index(used.start()); |
duke@435 | 96 | uintptr_t start_chunk_stride_num = start_chunk % n_strides; |
duke@435 | 97 | jbyte* chunk_card_start; |
duke@435 | 98 | |
duke@435 | 99 | if ((uintptr_t)stride >= start_chunk_stride_num) { |
duke@435 | 100 | chunk_card_start = (jbyte*)(start_card + |
duke@435 | 101 | (stride - start_chunk_stride_num) * |
ysr@2889 | 102 | ParGCCardsPerStrideChunk); |
duke@435 | 103 | } else { |
duke@435 | 104 | // Go ahead to the next chunk group boundary, then to the requested stride. |
duke@435 | 105 | chunk_card_start = (jbyte*)(start_card + |
duke@435 | 106 | (n_strides - start_chunk_stride_num + stride) * |
ysr@2889 | 107 | ParGCCardsPerStrideChunk); |
duke@435 | 108 | } |
duke@435 | 109 | |
duke@435 | 110 | while (chunk_card_start < end_card) { |
ysr@2889 | 111 | // Even though we go from lower to higher addresses below, the |
ysr@2889 | 112 | // strided parallelism can interleave the actual processing of the |
ysr@2889 | 113 | // dirty pages in various ways. For a specific chunk within this |
ysr@2889 | 114 | // stride, we take care to avoid double scanning or missing a card |
ysr@2889 | 115 | // by suitably initializing the "min_done" field in process_chunk_boundaries() |
ysr@2889 | 116 | // below, together with the dirty region extension accomplished in |
ysr@2889 | 117 | // DirtyCardToOopClosure::do_MemRegion(). |
ysr@2889 | 118 | jbyte* chunk_card_end = chunk_card_start + ParGCCardsPerStrideChunk; |
duke@435 | 119 | // Invariant: chunk_mr should be fully contained within the "used" region. |
duke@435 | 120 | MemRegion chunk_mr = MemRegion(addr_for(chunk_card_start), |
duke@435 | 121 | chunk_card_end >= end_card ? |
duke@435 | 122 | used.end() : addr_for(chunk_card_end)); |
duke@435 | 123 | assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)"); |
duke@435 | 124 | assert(used.contains(chunk_mr), "chunk_mr should be subset of used"); |
duke@435 | 125 | |
ysr@2889 | 126 | DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), |
ysr@2889 | 127 | cl->gen_boundary()); |
ysr@2889 | 128 | ClearNoncleanCardWrapper clear_cl(dcto_cl, ct); |
ysr@2889 | 129 | |
ysr@2889 | 130 | |
duke@435 | 131 | // Process the chunk. |
duke@435 | 132 | process_chunk_boundaries(sp, |
duke@435 | 133 | dcto_cl, |
duke@435 | 134 | chunk_mr, |
duke@435 | 135 | used, |
duke@435 | 136 | lowest_non_clean, |
duke@435 | 137 | lowest_non_clean_base_chunk_index, |
duke@435 | 138 | lowest_non_clean_chunk_size); |
duke@435 | 139 | |
ysr@2889 | 140 | // We want the LNC array updates above in process_chunk_boundaries |
ysr@2889 | 141 | // to be visible before any of the card table value changes as a |
ysr@2889 | 142 | // result of the dirty card iteration below. |
ysr@2889 | 143 | OrderAccess::storestore(); |
ysr@2889 | 144 | |
ysr@2819 | 145 | // We do not call the non_clean_card_iterate_serial() version because |
ysr@2889 | 146 | // we want to clear the cards: clear_cl here does the work of finding |
ysr@2889 | 147 | // contiguous dirty ranges of cards to process and clear. |
ysr@2889 | 148 | clear_cl.do_MemRegion(chunk_mr); |
duke@435 | 149 | |
duke@435 | 150 | // Find the next chunk of the stride. |
ysr@2889 | 151 | chunk_card_start += ParGCCardsPerStrideChunk * n_strides; |
duke@435 | 152 | } |
duke@435 | 153 | } |
duke@435 | 154 | |
ysr@2889 | 155 | |
ysr@2889 | 156 | // If you want a talkative process_chunk_boundaries, |
ysr@2889 | 157 | // then #define NOISY(x) x |
ysr@2889 | 158 | #ifdef NOISY |
ysr@2889 | 159 | #error "Encountered a global preprocessor flag, NOISY, which might clash with local definition to follow" |
ysr@2889 | 160 | #else |
ysr@2889 | 161 | #define NOISY(x) |
ysr@2889 | 162 | #endif |
ysr@2889 | 163 | |
duke@435 | 164 | void |
duke@435 | 165 | CardTableModRefBS:: |
duke@435 | 166 | process_chunk_boundaries(Space* sp, |
duke@435 | 167 | DirtyCardToOopClosure* dcto_cl, |
duke@435 | 168 | MemRegion chunk_mr, |
duke@435 | 169 | MemRegion used, |
duke@435 | 170 | jbyte** lowest_non_clean, |
duke@435 | 171 | uintptr_t lowest_non_clean_base_chunk_index, |
duke@435 | 172 | size_t lowest_non_clean_chunk_size) |
duke@435 | 173 | { |
ysr@2889 | 174 | // We must worry about non-array objects that cross chunk boundaries, |
ysr@2889 | 175 | // because such objects are both precisely and imprecisely marked: |
ysr@2889 | 176 | // .. if the head of such an object is dirty, the entire object |
ysr@2889 | 177 | // needs to be scanned, under the interpretation that this |
ysr@2889 | 178 | // was an imprecise mark |
ysr@2889 | 179 | // .. if the head of such an object is not dirty, we can assume |
ysr@2889 | 180 | // precise marking and it's efficient to scan just the dirty |
ysr@2889 | 181 | // cards. |
ysr@2889 | 182 | // In either case, each scanned reference must be scanned precisely |
ysr@2889 | 183 | // once so as to avoid cloning of a young referent. For efficiency, |
ysr@2889 | 184 | // our closures depend on this property and do not protect against |
ysr@2889 | 185 | // double scans. |
duke@435 | 186 | |
duke@435 | 187 | uintptr_t cur_chunk_index = addr_to_chunk_index(chunk_mr.start()); |
duke@435 | 188 | cur_chunk_index = cur_chunk_index - lowest_non_clean_base_chunk_index; |
duke@435 | 189 | |
ysr@2889 | 190 | NOISY(tty->print_cr("===========================================================================");) |
ysr@2889 | 191 | NOISY(tty->print_cr(" process_chunk_boundary: Called with [" PTR_FORMAT "," PTR_FORMAT ")", |
ysr@2889 | 192 | chunk_mr.start(), chunk_mr.end());) |
ysr@2889 | 193 | |
ysr@2889 | 194 | // First, set "our" lowest_non_clean entry, which would be |
ysr@2889 | 195 | // used by the thread scanning an adjoining left chunk with |
ysr@2889 | 196 | // a non-array object straddling the mutual boundary. |
ysr@2889 | 197 | // Find the object that spans our boundary, if one exists. |
ysr@2889 | 198 | // first_block is the block possibly straddling our left boundary. |
ysr@2889 | 199 | HeapWord* first_block = sp->block_start(chunk_mr.start()); |
ysr@2889 | 200 | assert((chunk_mr.start() != used.start()) || (first_block == chunk_mr.start()), |
ysr@2889 | 201 | "First chunk should always have a co-initial block"); |
ysr@2889 | 202 | // Does the block straddle the chunk's left boundary, and is it |
ysr@2889 | 203 | // a non-array object? |
ysr@2889 | 204 | if (first_block < chunk_mr.start() // first block straddles left bdry |
ysr@2889 | 205 | && sp->block_is_obj(first_block) // first block is an object |
ysr@2889 | 206 | && !(oop(first_block)->is_objArray() // first block is not an array (arrays are precisely dirtied) |
ysr@2889 | 207 | || oop(first_block)->is_typeArray())) { |
ysr@2889 | 208 | // Find our least non-clean card, so that a left neighbour |
ysr@2889 | 209 | // does not scan an object straddling the mutual boundary |
ysr@2889 | 210 | // too far to the right, and attempt to scan a portion of |
ysr@2889 | 211 | // that object twice. |
ysr@2889 | 212 | jbyte* first_dirty_card = NULL; |
ysr@2889 | 213 | jbyte* last_card_of_first_obj = |
ysr@2889 | 214 | byte_for(first_block + sp->block_size(first_block) - 1); |
ysr@2889 | 215 | jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start()); |
ysr@2889 | 216 | jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last()); |
ysr@2889 | 217 | jbyte* last_card_to_check = |
ysr@2889 | 218 | (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk, |
ysr@2889 | 219 | (intptr_t) last_card_of_first_obj); |
ysr@2889 | 220 | // Note that this does not need to go beyond our last card |
ysr@2889 | 221 | // if our first object completely straddles this chunk. |
ysr@2889 | 222 | for (jbyte* cur = first_card_of_cur_chunk; |
ysr@2889 | 223 | cur <= last_card_to_check; cur++) { |
ysr@2889 | 224 | jbyte val = *cur; |
ysr@2889 | 225 | if (card_will_be_scanned(val)) { |
ysr@2889 | 226 | first_dirty_card = cur; break; |
ysr@2889 | 227 | } else { |
ysr@2889 | 228 | assert(!card_may_have_been_dirty(val), "Error"); |
ysr@2889 | 229 | } |
ysr@2889 | 230 | } |
ysr@2889 | 231 | if (first_dirty_card != NULL) { |
ysr@2889 | 232 | NOISY(tty->print_cr(" LNC: Found a dirty card at " PTR_FORMAT " in current chunk", |
ysr@2889 | 233 | first_dirty_card);) |
ysr@2889 | 234 | assert(0 <= cur_chunk_index && cur_chunk_index < lowest_non_clean_chunk_size, |
ysr@2889 | 235 | "Bounds error."); |
ysr@2889 | 236 | assert(lowest_non_clean[cur_chunk_index] == NULL, |
ysr@2889 | 237 | "Write exactly once : value should be stable hereafter for this round"); |
ysr@2889 | 238 | lowest_non_clean[cur_chunk_index] = first_dirty_card; |
ysr@2889 | 239 | } NOISY(else { |
ysr@2889 | 240 | tty->print_cr(" LNC: Found no dirty card in current chunk; leaving LNC entry NULL"); |
ysr@2889 | 241 | // In the future, we could have this thread look for a non-NULL value to copy from its |
ysr@2889 | 242 | // right neighbour (up to the end of the first object). |
ysr@2889 | 243 | if (last_card_of_cur_chunk < last_card_of_first_obj) { |
ysr@2889 | 244 | tty->print_cr(" LNC: BEWARE!!! first obj straddles past right end of chunk:\n" |
ysr@2889 | 245 | " might be efficient to get value from right neighbour?"); |
ysr@2889 | 246 | } |
ysr@2889 | 247 | }) |
ysr@2889 | 248 | } else { |
ysr@2889 | 249 | // In this case we can help our neighbour by just asking them |
ysr@2889 | 250 | // to stop at our first card (even though it may not be dirty). |
ysr@2889 | 251 | NOISY(tty->print_cr(" LNC: first block is not a non-array object; setting LNC to first card of current chunk");) |
ysr@2889 | 252 | assert(lowest_non_clean[cur_chunk_index] == NULL, "Write once : value should be stable hereafter"); |
ysr@2889 | 253 | jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start()); |
ysr@2889 | 254 | lowest_non_clean[cur_chunk_index] = first_card_of_cur_chunk; |
ysr@2889 | 255 | } |
ysr@2889 | 256 | NOISY(tty->print_cr(" process_chunk_boundary: lowest_non_clean[" INTPTR_FORMAT "] = " PTR_FORMAT |
ysr@2889 | 257 | " which corresponds to the heap address " PTR_FORMAT, |
ysr@2889 | 258 | cur_chunk_index, lowest_non_clean[cur_chunk_index], |
ysr@2889 | 259 | (lowest_non_clean[cur_chunk_index] != NULL) |
ysr@2889 | 260 | ? addr_for(lowest_non_clean[cur_chunk_index]) |
ysr@2889 | 261 | : NULL);) |
ysr@2889 | 262 | NOISY(tty->print_cr("---------------------------------------------------------------------------");) |
ysr@2889 | 263 | |
ysr@2889 | 264 | // Next, set our own max_to_do, which will strictly/exclusively bound |
ysr@2889 | 265 | // the highest address that we will scan past the right end of our chunk. |
ysr@2889 | 266 | HeapWord* max_to_do = NULL; |
duke@435 | 267 | if (chunk_mr.end() < used.end()) { |
ysr@2889 | 268 | // This is not the last chunk in the used region. |
ysr@2889 | 269 | // What is our last block? We check the first block of |
ysr@2889 | 270 | // the next (right) chunk rather than strictly check our last block |
ysr@2889 | 271 | // because it's potentially more efficient to do so. |
ysr@2889 | 272 | HeapWord* const last_block = sp->block_start(chunk_mr.end()); |
duke@435 | 273 | assert(last_block <= chunk_mr.end(), "In case this property changes."); |
ysr@2889 | 274 | if ((last_block == chunk_mr.end()) // our last block does not straddle boundary |
ysr@2889 | 275 | || !sp->block_is_obj(last_block) // last_block isn't an object |
ysr@2889 | 276 | || oop(last_block)->is_objArray() // last_block is an array (precisely marked) |
ysr@2889 | 277 | || oop(last_block)->is_typeArray()) { |
duke@435 | 278 | max_to_do = chunk_mr.end(); |
ysr@2889 | 279 | NOISY(tty->print_cr(" process_chunk_boundary: Last block on this card is not a non-array object;\n" |
ysr@2889 | 280 | " max_to_do left at " PTR_FORMAT, max_to_do);) |
duke@435 | 281 | } else { |
ysr@2889 | 282 | assert(last_block < chunk_mr.end(), "Tautology"); |
ysr@2889 | 283 | // It is a non-array object that straddles the right boundary of this chunk. |
duke@435 | 284 | // last_obj_card is the card corresponding to the start of the last object |
duke@435 | 285 | // in the chunk. Note that the last object may not start in |
duke@435 | 286 | // the chunk. |
ysr@2889 | 287 | jbyte* const last_obj_card = byte_for(last_block); |
ysr@2889 | 288 | const jbyte val = *last_obj_card; |
ysr@2889 | 289 | if (!card_will_be_scanned(val)) { |
ysr@2889 | 290 | assert(!card_may_have_been_dirty(val), "Error"); |
ysr@2889 | 291 | // The card containing the head is not dirty. Any marks on |
duke@435 | 292 | // subsequent cards still in this chunk must have been made |
ysr@2889 | 293 | // precisely; we can cap processing at the end of our chunk. |
duke@435 | 294 | max_to_do = chunk_mr.end(); |
ysr@2889 | 295 | NOISY(tty->print_cr(" process_chunk_boundary: Head of last object on this card is not dirty;\n" |
ysr@2889 | 296 | " max_to_do left at " PTR_FORMAT, |
ysr@2889 | 297 | max_to_do);) |
duke@435 | 298 | } else { |
duke@435 | 299 | // The last object must be considered dirty, and extends onto the |
duke@435 | 300 | // following chunk. Look for a dirty card in that chunk that will |
duke@435 | 301 | // bound our processing. |
duke@435 | 302 | jbyte* limit_card = NULL; |
ysr@2889 | 303 | const size_t last_block_size = sp->block_size(last_block); |
ysr@2889 | 304 | jbyte* const last_card_of_last_obj = |
duke@435 | 305 | byte_for(last_block + last_block_size - 1); |
ysr@2889 | 306 | jbyte* const first_card_of_next_chunk = byte_for(chunk_mr.end()); |
duke@435 | 307 | // This search potentially goes a long distance looking |
ysr@2889 | 308 | // for the next card that will be scanned, terminating |
ysr@2889 | 309 | // at the end of the last_block, if no earlier dirty card |
ysr@2889 | 310 | // is found. |
ysr@2889 | 311 | assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start()) == ParGCCardsPerStrideChunk, |
ysr@2889 | 312 | "last card of next chunk may be wrong"); |
duke@435 | 313 | for (jbyte* cur = first_card_of_next_chunk; |
ysr@2889 | 314 | cur <= last_card_of_last_obj; cur++) { |
ysr@2889 | 315 | const jbyte val = *cur; |
ysr@2889 | 316 | if (card_will_be_scanned(val)) { |
ysr@2889 | 317 | NOISY(tty->print_cr(" Found a non-clean card " PTR_FORMAT " with value 0x%x", |
ysr@2889 | 318 | cur, (int)val);) |
duke@435 | 319 | limit_card = cur; break; |
ysr@2889 | 320 | } else { |
ysr@2889 | 321 | assert(!card_may_have_been_dirty(val), "Error: card can't be skipped"); |
duke@435 | 322 | } |
duke@435 | 323 | } |
ysr@2889 | 324 | if (limit_card != NULL) { |
ysr@2889 | 325 | max_to_do = addr_for(limit_card); |
ysr@2889 | 326 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
ysr@2889 | 327 | NOISY(tty->print_cr(" process_chunk_boundary: Found a dirty card at " PTR_FORMAT |
ysr@2889 | 328 | " max_to_do set at " PTR_FORMAT " which is before end of last block in chunk: " |
ysr@2889 | 329 | PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT, |
ysr@2889 | 330 | limit_card, max_to_do, last_block, last_block_size, (last_block+last_block_size));) |
ysr@2889 | 331 | } else { |
ysr@2889 | 332 | // The following is a pessimistic value, because it's possible |
ysr@2889 | 333 | // that a dirty card on a subsequent chunk has been cleared by |
ysr@2889 | 334 | // the time we get to look at it; we'll correct for that further below, |
ysr@2889 | 335 | // using the LNC array which records the least non-clean card |
ysr@2889 | 336 | // before cards were cleared in a particular chunk. |
ysr@2889 | 337 | limit_card = last_card_of_last_obj; |
ysr@2889 | 338 | max_to_do = last_block + last_block_size; |
ysr@2889 | 339 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
ysr@2889 | 340 | NOISY(tty->print_cr(" process_chunk_boundary: Found no dirty card before end of last block in chunk\n" |
ysr@2889 | 341 | " Setting limit_card to " PTR_FORMAT |
ysr@2889 | 342 | " and max_to_do " PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT, |
ysr@2889 | 343 | limit_card, last_block, last_block_size, max_to_do);) |
ysr@2889 | 344 | } |
ysr@2889 | 345 | assert(0 < cur_chunk_index+1 && cur_chunk_index+1 < lowest_non_clean_chunk_size, |
duke@435 | 346 | "Bounds error."); |
ysr@2889 | 347 | // It is possible that a dirty card for the last object may have been |
ysr@2889 | 348 | // cleared before we had a chance to examine it. In that case, the value |
ysr@2889 | 349 | // will have been logged in the LNC for that chunk. |
ysr@2889 | 350 | // We need to examine as many chunks to the right as this object |
ysr@2889 | 351 | // covers. |
ysr@2889 | 352 | const uintptr_t last_chunk_index_to_check = addr_to_chunk_index(last_block + last_block_size - 1) |
ysr@2889 | 353 | - lowest_non_clean_base_chunk_index; |
ysr@2889 | 354 | DEBUG_ONLY(const uintptr_t last_chunk_index = addr_to_chunk_index(used.end()) |
ysr@2889 | 355 | - lowest_non_clean_base_chunk_index;) |
ysr@2889 | 356 | assert(last_chunk_index_to_check <= last_chunk_index, |
ysr@2889 | 357 | err_msg("Out of bounds: last_chunk_index_to_check " INTPTR_FORMAT |
ysr@2889 | 358 | " exceeds last_chunk_index " INTPTR_FORMAT, |
ysr@2889 | 359 | last_chunk_index_to_check, last_chunk_index)); |
ysr@2889 | 360 | for (uintptr_t lnc_index = cur_chunk_index + 1; |
ysr@2889 | 361 | lnc_index <= last_chunk_index_to_check; |
ysr@2889 | 362 | lnc_index++) { |
ysr@2889 | 363 | jbyte* lnc_card = lowest_non_clean[lnc_index]; |
ysr@2889 | 364 | if (lnc_card != NULL) { |
ysr@2889 | 365 | // we can stop at the first non-NULL entry we find |
ysr@2889 | 366 | if (lnc_card <= limit_card) { |
ysr@2889 | 367 | NOISY(tty->print_cr(" process_chunk_boundary: LNC card " PTR_FORMAT " is lower than limit_card " PTR_FORMAT, |
ysr@2889 | 368 | " max_to_do will be lowered to " PTR_FORMAT " from " PTR_FORMAT, |
ysr@2889 | 369 | lnc_card, limit_card, addr_for(lnc_card), max_to_do);) |
ysr@2889 | 370 | limit_card = lnc_card; |
ysr@2889 | 371 | max_to_do = addr_for(limit_card); |
ysr@2889 | 372 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
ysr@2889 | 373 | } |
ysr@2889 | 374 | // In any case, we break now |
ysr@2889 | 375 | break; |
ysr@2889 | 376 | } // else continue to look for a non-NULL entry if any |
duke@435 | 377 | } |
ysr@2889 | 378 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
duke@435 | 379 | } |
ysr@2889 | 380 | assert(max_to_do != NULL, "OOPS 1 !"); |
duke@435 | 381 | } |
ysr@2889 | 382 | assert(max_to_do != NULL, "OOPS 2!"); |
duke@435 | 383 | } else { |
duke@435 | 384 | max_to_do = used.end(); |
ysr@2889 | 385 | NOISY(tty->print_cr(" process_chunk_boundary: Last chunk of this space;\n" |
ysr@2889 | 386 | " max_to_do left at " PTR_FORMAT, |
ysr@2889 | 387 | max_to_do);) |
duke@435 | 388 | } |
ysr@2889 | 389 | assert(max_to_do != NULL, "OOPS 3!"); |
duke@435 | 390 | // Now we can set the closure we're using so it doesn't to beyond |
duke@435 | 391 | // max_to_do. |
duke@435 | 392 | dcto_cl->set_min_done(max_to_do); |
duke@435 | 393 | #ifndef PRODUCT |
duke@435 | 394 | dcto_cl->set_last_bottom(max_to_do); |
duke@435 | 395 | #endif |
ysr@2889 | 396 | NOISY(tty->print_cr("===========================================================================\n");) |
ysr@2889 | 397 | } |
duke@435 | 398 | |
ysr@2889 | 399 | #undef NOISY |
duke@435 | 400 | |
duke@435 | 401 | void |
duke@435 | 402 | CardTableModRefBS:: |
duke@435 | 403 | get_LNC_array_for_space(Space* sp, |
duke@435 | 404 | jbyte**& lowest_non_clean, |
duke@435 | 405 | uintptr_t& lowest_non_clean_base_chunk_index, |
duke@435 | 406 | size_t& lowest_non_clean_chunk_size) { |
duke@435 | 407 | |
duke@435 | 408 | int i = find_covering_region_containing(sp->bottom()); |
duke@435 | 409 | MemRegion covered = _covered[i]; |
duke@435 | 410 | size_t n_chunks = chunks_to_cover(covered); |
duke@435 | 411 | |
duke@435 | 412 | // Only the first thread to obtain the lock will resize the |
duke@435 | 413 | // LNC array for the covered region. Any later expansion can't affect |
duke@435 | 414 | // the used_at_save_marks region. |
duke@435 | 415 | // (I observed a bug in which the first thread to execute this would |
ysr@2889 | 416 | // resize, and then it would cause "expand_and_allocate" that would |
ysr@2889 | 417 | // increase the number of chunks in the covered region. Then a second |
duke@435 | 418 | // thread would come and execute this, see that the size didn't match, |
duke@435 | 419 | // and free and allocate again. So the first thread would be using a |
duke@435 | 420 | // freed "_lowest_non_clean" array.) |
duke@435 | 421 | |
duke@435 | 422 | // Do a dirty read here. If we pass the conditional then take the rare |
duke@435 | 423 | // event lock and do the read again in case some other thread had already |
duke@435 | 424 | // succeeded and done the resize. |
duke@435 | 425 | int cur_collection = Universe::heap()->total_collections(); |
duke@435 | 426 | if (_last_LNC_resizing_collection[i] != cur_collection) { |
duke@435 | 427 | MutexLocker x(ParGCRareEvent_lock); |
duke@435 | 428 | if (_last_LNC_resizing_collection[i] != cur_collection) { |
duke@435 | 429 | if (_lowest_non_clean[i] == NULL || |
duke@435 | 430 | n_chunks != _lowest_non_clean_chunk_size[i]) { |
duke@435 | 431 | |
duke@435 | 432 | // Should we delete the old? |
duke@435 | 433 | if (_lowest_non_clean[i] != NULL) { |
duke@435 | 434 | assert(n_chunks != _lowest_non_clean_chunk_size[i], |
duke@435 | 435 | "logical consequence"); |
duke@435 | 436 | FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i]); |
duke@435 | 437 | _lowest_non_clean[i] = NULL; |
duke@435 | 438 | } |
duke@435 | 439 | // Now allocate a new one if necessary. |
duke@435 | 440 | if (_lowest_non_clean[i] == NULL) { |
duke@435 | 441 | _lowest_non_clean[i] = NEW_C_HEAP_ARRAY(CardPtr, n_chunks); |
duke@435 | 442 | _lowest_non_clean_chunk_size[i] = n_chunks; |
duke@435 | 443 | _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start()); |
duke@435 | 444 | for (int j = 0; j < (int)n_chunks; j++) |
duke@435 | 445 | _lowest_non_clean[i][j] = NULL; |
duke@435 | 446 | } |
duke@435 | 447 | } |
duke@435 | 448 | _last_LNC_resizing_collection[i] = cur_collection; |
duke@435 | 449 | } |
duke@435 | 450 | } |
duke@435 | 451 | // In any case, now do the initialization. |
duke@435 | 452 | lowest_non_clean = _lowest_non_clean[i]; |
duke@435 | 453 | lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i]; |
duke@435 | 454 | lowest_non_clean_chunk_size = _lowest_non_clean_chunk_size[i]; |
duke@435 | 455 | } |