Tue, 08 Aug 2017 15:57:29 +0800
merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. |
aoqi@0 | 8 | * |
aoqi@0 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 13 | * accompanied this code). |
aoqi@0 | 14 | * |
aoqi@0 | 15 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 18 | * |
aoqi@0 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 20 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 21 | * questions. |
aoqi@0 | 22 | * |
aoqi@0 | 23 | */ |
aoqi@0 | 24 | |
aoqi@0 | 25 | #include "precompiled.hpp" |
aoqi@0 | 26 | #include "memory/allocation.inline.hpp" |
aoqi@0 | 27 | #include "memory/cardTableModRefBS.hpp" |
aoqi@0 | 28 | #include "memory/cardTableRS.hpp" |
aoqi@0 | 29 | #include "memory/sharedHeap.hpp" |
aoqi@0 | 30 | #include "memory/space.inline.hpp" |
aoqi@0 | 31 | #include "memory/universe.hpp" |
aoqi@0 | 32 | #include "oops/oop.inline.hpp" |
aoqi@0 | 33 | #include "runtime/java.hpp" |
aoqi@0 | 34 | #include "runtime/mutexLocker.hpp" |
aoqi@0 | 35 | #include "runtime/virtualspace.hpp" |
aoqi@0 | 36 | #include "runtime/vmThread.hpp" |
aoqi@0 | 37 | |
aoqi@0 | 38 | PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC |
aoqi@0 | 39 | |
aoqi@0 | 40 | void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, |
aoqi@0 | 41 | OopsInGenClosure* cl, |
aoqi@0 | 42 | CardTableRS* ct, |
aoqi@0 | 43 | int n_threads) { |
aoqi@0 | 44 | assert(n_threads > 0, "Error: expected n_threads > 0"); |
aoqi@0 | 45 | assert((n_threads == 1 && ParallelGCThreads == 0) || |
aoqi@0 | 46 | n_threads <= (int)ParallelGCThreads, |
aoqi@0 | 47 | "# worker threads != # requested!"); |
aoqi@0 | 48 | assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread"); |
aoqi@0 | 49 | assert(UseDynamicNumberOfGCThreads || |
aoqi@0 | 50 | !FLAG_IS_DEFAULT(ParallelGCThreads) || |
aoqi@0 | 51 | n_threads == (int)ParallelGCThreads, |
aoqi@0 | 52 | "# worker threads != # requested!"); |
aoqi@0 | 53 | // Make sure the LNC array is valid for the space. |
aoqi@0 | 54 | jbyte** lowest_non_clean; |
aoqi@0 | 55 | uintptr_t lowest_non_clean_base_chunk_index; |
aoqi@0 | 56 | size_t lowest_non_clean_chunk_size; |
aoqi@0 | 57 | get_LNC_array_for_space(sp, lowest_non_clean, |
aoqi@0 | 58 | lowest_non_clean_base_chunk_index, |
aoqi@0 | 59 | lowest_non_clean_chunk_size); |
aoqi@0 | 60 | |
aoqi@0 | 61 | uint n_strides = n_threads * ParGCStridesPerThread; |
aoqi@0 | 62 | SequentialSubTasksDone* pst = sp->par_seq_tasks(); |
aoqi@0 | 63 | // Sets the condition for completion of the subtask (how many threads |
aoqi@0 | 64 | // need to finish in order to be done). |
aoqi@0 | 65 | pst->set_n_threads(n_threads); |
aoqi@0 | 66 | pst->set_n_tasks(n_strides); |
aoqi@0 | 67 | |
aoqi@0 | 68 | uint stride = 0; |
aoqi@0 | 69 | while (!pst->is_task_claimed(/* reference */ stride)) { |
aoqi@0 | 70 | process_stride(sp, mr, stride, n_strides, cl, ct, |
aoqi@0 | 71 | lowest_non_clean, |
aoqi@0 | 72 | lowest_non_clean_base_chunk_index, |
aoqi@0 | 73 | lowest_non_clean_chunk_size); |
aoqi@0 | 74 | } |
aoqi@0 | 75 | if (pst->all_tasks_completed()) { |
aoqi@0 | 76 | // Clear lowest_non_clean array for next time. |
aoqi@0 | 77 | intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); |
aoqi@0 | 78 | uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); |
aoqi@0 | 79 | for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { |
aoqi@0 | 80 | intptr_t ind = ch - lowest_non_clean_base_chunk_index; |
aoqi@0 | 81 | assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, |
aoqi@0 | 82 | "Bounds error"); |
aoqi@0 | 83 | lowest_non_clean[ind] = NULL; |
aoqi@0 | 84 | } |
aoqi@0 | 85 | } |
aoqi@0 | 86 | } |
aoqi@0 | 87 | |
aoqi@0 | 88 | void |
aoqi@0 | 89 | CardTableModRefBS:: |
aoqi@0 | 90 | process_stride(Space* sp, |
aoqi@0 | 91 | MemRegion used, |
aoqi@0 | 92 | jint stride, int n_strides, |
aoqi@0 | 93 | OopsInGenClosure* cl, |
aoqi@0 | 94 | CardTableRS* ct, |
aoqi@0 | 95 | jbyte** lowest_non_clean, |
aoqi@0 | 96 | uintptr_t lowest_non_clean_base_chunk_index, |
aoqi@0 | 97 | size_t lowest_non_clean_chunk_size) { |
aoqi@0 | 98 | // We go from higher to lower addresses here; it wouldn't help that much |
aoqi@0 | 99 | // because of the strided parallelism pattern used here. |
aoqi@0 | 100 | |
aoqi@0 | 101 | // Find the first card address of the first chunk in the stride that is |
aoqi@0 | 102 | // at least "bottom" of the used region. |
aoqi@0 | 103 | jbyte* start_card = byte_for(used.start()); |
aoqi@0 | 104 | jbyte* end_card = byte_after(used.last()); |
aoqi@0 | 105 | uintptr_t start_chunk = addr_to_chunk_index(used.start()); |
aoqi@0 | 106 | uintptr_t start_chunk_stride_num = start_chunk % n_strides; |
aoqi@0 | 107 | jbyte* chunk_card_start; |
aoqi@0 | 108 | |
aoqi@0 | 109 | if ((uintptr_t)stride >= start_chunk_stride_num) { |
aoqi@0 | 110 | chunk_card_start = (jbyte*)(start_card + |
aoqi@0 | 111 | (stride - start_chunk_stride_num) * |
aoqi@0 | 112 | ParGCCardsPerStrideChunk); |
aoqi@0 | 113 | } else { |
aoqi@0 | 114 | // Go ahead to the next chunk group boundary, then to the requested stride. |
aoqi@0 | 115 | chunk_card_start = (jbyte*)(start_card + |
aoqi@0 | 116 | (n_strides - start_chunk_stride_num + stride) * |
aoqi@0 | 117 | ParGCCardsPerStrideChunk); |
aoqi@0 | 118 | } |
aoqi@0 | 119 | |
aoqi@0 | 120 | while (chunk_card_start < end_card) { |
aoqi@0 | 121 | // Even though we go from lower to higher addresses below, the |
aoqi@0 | 122 | // strided parallelism can interleave the actual processing of the |
aoqi@0 | 123 | // dirty pages in various ways. For a specific chunk within this |
aoqi@0 | 124 | // stride, we take care to avoid double scanning or missing a card |
aoqi@0 | 125 | // by suitably initializing the "min_done" field in process_chunk_boundaries() |
aoqi@0 | 126 | // below, together with the dirty region extension accomplished in |
aoqi@0 | 127 | // DirtyCardToOopClosure::do_MemRegion(). |
aoqi@0 | 128 | jbyte* chunk_card_end = chunk_card_start + ParGCCardsPerStrideChunk; |
aoqi@0 | 129 | // Invariant: chunk_mr should be fully contained within the "used" region. |
aoqi@0 | 130 | MemRegion chunk_mr = MemRegion(addr_for(chunk_card_start), |
aoqi@0 | 131 | chunk_card_end >= end_card ? |
aoqi@0 | 132 | used.end() : addr_for(chunk_card_end)); |
aoqi@0 | 133 | assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)"); |
aoqi@0 | 134 | assert(used.contains(chunk_mr), "chunk_mr should be subset of used"); |
aoqi@0 | 135 | |
aoqi@0 | 136 | DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), |
aoqi@0 | 137 | cl->gen_boundary()); |
aoqi@0 | 138 | ClearNoncleanCardWrapper clear_cl(dcto_cl, ct); |
aoqi@0 | 139 | |
aoqi@0 | 140 | |
aoqi@0 | 141 | // Process the chunk. |
aoqi@0 | 142 | process_chunk_boundaries(sp, |
aoqi@0 | 143 | dcto_cl, |
aoqi@0 | 144 | chunk_mr, |
aoqi@0 | 145 | used, |
aoqi@0 | 146 | lowest_non_clean, |
aoqi@0 | 147 | lowest_non_clean_base_chunk_index, |
aoqi@0 | 148 | lowest_non_clean_chunk_size); |
aoqi@0 | 149 | |
aoqi@0 | 150 | // We want the LNC array updates above in process_chunk_boundaries |
aoqi@0 | 151 | // to be visible before any of the card table value changes as a |
aoqi@0 | 152 | // result of the dirty card iteration below. |
aoqi@0 | 153 | OrderAccess::storestore(); |
aoqi@0 | 154 | |
aoqi@0 | 155 | // We do not call the non_clean_card_iterate_serial() version because |
aoqi@0 | 156 | // we want to clear the cards: clear_cl here does the work of finding |
aoqi@0 | 157 | // contiguous dirty ranges of cards to process and clear. |
aoqi@0 | 158 | clear_cl.do_MemRegion(chunk_mr); |
aoqi@0 | 159 | |
aoqi@0 | 160 | // Find the next chunk of the stride. |
aoqi@0 | 161 | chunk_card_start += ParGCCardsPerStrideChunk * n_strides; |
aoqi@0 | 162 | } |
aoqi@0 | 163 | } |
aoqi@0 | 164 | |
aoqi@0 | 165 | |
aoqi@0 | 166 | // If you want a talkative process_chunk_boundaries, |
aoqi@0 | 167 | // then #define NOISY(x) x |
aoqi@0 | 168 | #ifdef NOISY |
aoqi@0 | 169 | #error "Encountered a global preprocessor flag, NOISY, which might clash with local definition to follow" |
aoqi@0 | 170 | #else |
aoqi@0 | 171 | #define NOISY(x) |
aoqi@0 | 172 | #endif |
aoqi@0 | 173 | |
aoqi@0 | 174 | void |
aoqi@0 | 175 | CardTableModRefBS:: |
aoqi@0 | 176 | process_chunk_boundaries(Space* sp, |
aoqi@0 | 177 | DirtyCardToOopClosure* dcto_cl, |
aoqi@0 | 178 | MemRegion chunk_mr, |
aoqi@0 | 179 | MemRegion used, |
aoqi@0 | 180 | jbyte** lowest_non_clean, |
aoqi@0 | 181 | uintptr_t lowest_non_clean_base_chunk_index, |
aoqi@0 | 182 | size_t lowest_non_clean_chunk_size) |
aoqi@0 | 183 | { |
aoqi@0 | 184 | // We must worry about non-array objects that cross chunk boundaries, |
aoqi@0 | 185 | // because such objects are both precisely and imprecisely marked: |
aoqi@0 | 186 | // .. if the head of such an object is dirty, the entire object |
aoqi@0 | 187 | // needs to be scanned, under the interpretation that this |
aoqi@0 | 188 | // was an imprecise mark |
aoqi@0 | 189 | // .. if the head of such an object is not dirty, we can assume |
aoqi@0 | 190 | // precise marking and it's efficient to scan just the dirty |
aoqi@0 | 191 | // cards. |
aoqi@0 | 192 | // In either case, each scanned reference must be scanned precisely |
aoqi@0 | 193 | // once so as to avoid cloning of a young referent. For efficiency, |
aoqi@0 | 194 | // our closures depend on this property and do not protect against |
aoqi@0 | 195 | // double scans. |
aoqi@0 | 196 | |
aoqi@0 | 197 | uintptr_t cur_chunk_index = addr_to_chunk_index(chunk_mr.start()); |
aoqi@0 | 198 | cur_chunk_index = cur_chunk_index - lowest_non_clean_base_chunk_index; |
aoqi@0 | 199 | |
aoqi@0 | 200 | NOISY(tty->print_cr("===========================================================================");) |
aoqi@0 | 201 | NOISY(tty->print_cr(" process_chunk_boundary: Called with [" PTR_FORMAT "," PTR_FORMAT ")", |
aoqi@0 | 202 | chunk_mr.start(), chunk_mr.end());) |
aoqi@0 | 203 | |
aoqi@0 | 204 | // First, set "our" lowest_non_clean entry, which would be |
aoqi@0 | 205 | // used by the thread scanning an adjoining left chunk with |
aoqi@0 | 206 | // a non-array object straddling the mutual boundary. |
aoqi@0 | 207 | // Find the object that spans our boundary, if one exists. |
aoqi@0 | 208 | // first_block is the block possibly straddling our left boundary. |
aoqi@0 | 209 | HeapWord* first_block = sp->block_start(chunk_mr.start()); |
aoqi@0 | 210 | assert((chunk_mr.start() != used.start()) || (first_block == chunk_mr.start()), |
aoqi@0 | 211 | "First chunk should always have a co-initial block"); |
aoqi@0 | 212 | // Does the block straddle the chunk's left boundary, and is it |
aoqi@0 | 213 | // a non-array object? |
aoqi@0 | 214 | if (first_block < chunk_mr.start() // first block straddles left bdry |
aoqi@0 | 215 | && sp->block_is_obj(first_block) // first block is an object |
aoqi@0 | 216 | && !(oop(first_block)->is_objArray() // first block is not an array (arrays are precisely dirtied) |
aoqi@0 | 217 | || oop(first_block)->is_typeArray())) { |
aoqi@0 | 218 | // Find our least non-clean card, so that a left neighbour |
aoqi@0 | 219 | // does not scan an object straddling the mutual boundary |
aoqi@0 | 220 | // too far to the right, and attempt to scan a portion of |
aoqi@0 | 221 | // that object twice. |
aoqi@0 | 222 | jbyte* first_dirty_card = NULL; |
aoqi@0 | 223 | jbyte* last_card_of_first_obj = |
aoqi@0 | 224 | byte_for(first_block + sp->block_size(first_block) - 1); |
aoqi@0 | 225 | jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start()); |
aoqi@0 | 226 | jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last()); |
aoqi@0 | 227 | jbyte* last_card_to_check = |
aoqi@0 | 228 | (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk, |
aoqi@0 | 229 | (intptr_t) last_card_of_first_obj); |
aoqi@0 | 230 | // Note that this does not need to go beyond our last card |
aoqi@0 | 231 | // if our first object completely straddles this chunk. |
aoqi@0 | 232 | for (jbyte* cur = first_card_of_cur_chunk; |
aoqi@0 | 233 | cur <= last_card_to_check; cur++) { |
aoqi@0 | 234 | jbyte val = *cur; |
aoqi@0 | 235 | if (card_will_be_scanned(val)) { |
aoqi@0 | 236 | first_dirty_card = cur; break; |
aoqi@0 | 237 | } else { |
aoqi@0 | 238 | assert(!card_may_have_been_dirty(val), "Error"); |
aoqi@0 | 239 | } |
aoqi@0 | 240 | } |
aoqi@0 | 241 | if (first_dirty_card != NULL) { |
aoqi@0 | 242 | NOISY(tty->print_cr(" LNC: Found a dirty card at " PTR_FORMAT " in current chunk", |
aoqi@0 | 243 | first_dirty_card);) |
aoqi@0 | 244 | assert(0 <= cur_chunk_index && cur_chunk_index < lowest_non_clean_chunk_size, |
aoqi@0 | 245 | "Bounds error."); |
aoqi@0 | 246 | assert(lowest_non_clean[cur_chunk_index] == NULL, |
aoqi@0 | 247 | "Write exactly once : value should be stable hereafter for this round"); |
aoqi@0 | 248 | lowest_non_clean[cur_chunk_index] = first_dirty_card; |
aoqi@0 | 249 | } NOISY(else { |
aoqi@0 | 250 | tty->print_cr(" LNC: Found no dirty card in current chunk; leaving LNC entry NULL"); |
aoqi@0 | 251 | // In the future, we could have this thread look for a non-NULL value to copy from its |
aoqi@0 | 252 | // right neighbour (up to the end of the first object). |
aoqi@0 | 253 | if (last_card_of_cur_chunk < last_card_of_first_obj) { |
aoqi@0 | 254 | tty->print_cr(" LNC: BEWARE!!! first obj straddles past right end of chunk:\n" |
aoqi@0 | 255 | " might be efficient to get value from right neighbour?"); |
aoqi@0 | 256 | } |
aoqi@0 | 257 | }) |
aoqi@0 | 258 | } else { |
aoqi@0 | 259 | // In this case we can help our neighbour by just asking them |
aoqi@0 | 260 | // to stop at our first card (even though it may not be dirty). |
aoqi@0 | 261 | NOISY(tty->print_cr(" LNC: first block is not a non-array object; setting LNC to first card of current chunk");) |
aoqi@0 | 262 | assert(lowest_non_clean[cur_chunk_index] == NULL, "Write once : value should be stable hereafter"); |
aoqi@0 | 263 | jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start()); |
aoqi@0 | 264 | lowest_non_clean[cur_chunk_index] = first_card_of_cur_chunk; |
aoqi@0 | 265 | } |
aoqi@0 | 266 | NOISY(tty->print_cr(" process_chunk_boundary: lowest_non_clean[" INTPTR_FORMAT "] = " PTR_FORMAT |
aoqi@0 | 267 | " which corresponds to the heap address " PTR_FORMAT, |
aoqi@0 | 268 | cur_chunk_index, lowest_non_clean[cur_chunk_index], |
aoqi@0 | 269 | (lowest_non_clean[cur_chunk_index] != NULL) |
aoqi@0 | 270 | ? addr_for(lowest_non_clean[cur_chunk_index]) |
aoqi@0 | 271 | : NULL);) |
aoqi@0 | 272 | NOISY(tty->print_cr("---------------------------------------------------------------------------");) |
aoqi@0 | 273 | |
aoqi@0 | 274 | // Next, set our own max_to_do, which will strictly/exclusively bound |
aoqi@0 | 275 | // the highest address that we will scan past the right end of our chunk. |
aoqi@0 | 276 | HeapWord* max_to_do = NULL; |
aoqi@0 | 277 | if (chunk_mr.end() < used.end()) { |
aoqi@0 | 278 | // This is not the last chunk in the used region. |
aoqi@0 | 279 | // What is our last block? We check the first block of |
aoqi@0 | 280 | // the next (right) chunk rather than strictly check our last block |
aoqi@0 | 281 | // because it's potentially more efficient to do so. |
aoqi@0 | 282 | HeapWord* const last_block = sp->block_start(chunk_mr.end()); |
aoqi@0 | 283 | assert(last_block <= chunk_mr.end(), "In case this property changes."); |
aoqi@0 | 284 | if ((last_block == chunk_mr.end()) // our last block does not straddle boundary |
aoqi@0 | 285 | || !sp->block_is_obj(last_block) // last_block isn't an object |
aoqi@0 | 286 | || oop(last_block)->is_objArray() // last_block is an array (precisely marked) |
aoqi@0 | 287 | || oop(last_block)->is_typeArray()) { |
aoqi@0 | 288 | max_to_do = chunk_mr.end(); |
aoqi@0 | 289 | NOISY(tty->print_cr(" process_chunk_boundary: Last block on this card is not a non-array object;\n" |
aoqi@0 | 290 | " max_to_do left at " PTR_FORMAT, max_to_do);) |
aoqi@0 | 291 | } else { |
aoqi@0 | 292 | assert(last_block < chunk_mr.end(), "Tautology"); |
aoqi@0 | 293 | // It is a non-array object that straddles the right boundary of this chunk. |
aoqi@0 | 294 | // last_obj_card is the card corresponding to the start of the last object |
aoqi@0 | 295 | // in the chunk. Note that the last object may not start in |
aoqi@0 | 296 | // the chunk. |
aoqi@0 | 297 | jbyte* const last_obj_card = byte_for(last_block); |
aoqi@0 | 298 | const jbyte val = *last_obj_card; |
aoqi@0 | 299 | if (!card_will_be_scanned(val)) { |
aoqi@0 | 300 | assert(!card_may_have_been_dirty(val), "Error"); |
aoqi@0 | 301 | // The card containing the head is not dirty. Any marks on |
aoqi@0 | 302 | // subsequent cards still in this chunk must have been made |
aoqi@0 | 303 | // precisely; we can cap processing at the end of our chunk. |
aoqi@0 | 304 | max_to_do = chunk_mr.end(); |
aoqi@0 | 305 | NOISY(tty->print_cr(" process_chunk_boundary: Head of last object on this card is not dirty;\n" |
aoqi@0 | 306 | " max_to_do left at " PTR_FORMAT, |
aoqi@0 | 307 | max_to_do);) |
aoqi@0 | 308 | } else { |
aoqi@0 | 309 | // The last object must be considered dirty, and extends onto the |
aoqi@0 | 310 | // following chunk. Look for a dirty card in that chunk that will |
aoqi@0 | 311 | // bound our processing. |
aoqi@0 | 312 | jbyte* limit_card = NULL; |
aoqi@0 | 313 | const size_t last_block_size = sp->block_size(last_block); |
aoqi@0 | 314 | jbyte* const last_card_of_last_obj = |
aoqi@0 | 315 | byte_for(last_block + last_block_size - 1); |
aoqi@0 | 316 | jbyte* const first_card_of_next_chunk = byte_for(chunk_mr.end()); |
aoqi@0 | 317 | // This search potentially goes a long distance looking |
aoqi@0 | 318 | // for the next card that will be scanned, terminating |
aoqi@0 | 319 | // at the end of the last_block, if no earlier dirty card |
aoqi@0 | 320 | // is found. |
aoqi@0 | 321 | assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start()) == ParGCCardsPerStrideChunk, |
aoqi@0 | 322 | "last card of next chunk may be wrong"); |
aoqi@0 | 323 | for (jbyte* cur = first_card_of_next_chunk; |
aoqi@0 | 324 | cur <= last_card_of_last_obj; cur++) { |
aoqi@0 | 325 | const jbyte val = *cur; |
aoqi@0 | 326 | if (card_will_be_scanned(val)) { |
aoqi@0 | 327 | NOISY(tty->print_cr(" Found a non-clean card " PTR_FORMAT " with value 0x%x", |
aoqi@0 | 328 | cur, (int)val);) |
aoqi@0 | 329 | limit_card = cur; break; |
aoqi@0 | 330 | } else { |
aoqi@0 | 331 | assert(!card_may_have_been_dirty(val), "Error: card can't be skipped"); |
aoqi@0 | 332 | } |
aoqi@0 | 333 | } |
aoqi@0 | 334 | if (limit_card != NULL) { |
aoqi@0 | 335 | max_to_do = addr_for(limit_card); |
aoqi@0 | 336 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
aoqi@0 | 337 | NOISY(tty->print_cr(" process_chunk_boundary: Found a dirty card at " PTR_FORMAT |
aoqi@0 | 338 | " max_to_do set at " PTR_FORMAT " which is before end of last block in chunk: " |
aoqi@0 | 339 | PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT, |
aoqi@0 | 340 | limit_card, max_to_do, last_block, last_block_size, (last_block+last_block_size));) |
aoqi@0 | 341 | } else { |
aoqi@0 | 342 | // The following is a pessimistic value, because it's possible |
aoqi@0 | 343 | // that a dirty card on a subsequent chunk has been cleared by |
aoqi@0 | 344 | // the time we get to look at it; we'll correct for that further below, |
aoqi@0 | 345 | // using the LNC array which records the least non-clean card |
aoqi@0 | 346 | // before cards were cleared in a particular chunk. |
aoqi@0 | 347 | limit_card = last_card_of_last_obj; |
aoqi@0 | 348 | max_to_do = last_block + last_block_size; |
aoqi@0 | 349 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
aoqi@0 | 350 | NOISY(tty->print_cr(" process_chunk_boundary: Found no dirty card before end of last block in chunk\n" |
aoqi@0 | 351 | " Setting limit_card to " PTR_FORMAT |
aoqi@0 | 352 | " and max_to_do " PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT, |
aoqi@0 | 353 | limit_card, last_block, last_block_size, max_to_do);) |
aoqi@0 | 354 | } |
aoqi@0 | 355 | assert(0 < cur_chunk_index+1 && cur_chunk_index+1 < lowest_non_clean_chunk_size, |
aoqi@0 | 356 | "Bounds error."); |
aoqi@0 | 357 | // It is possible that a dirty card for the last object may have been |
aoqi@0 | 358 | // cleared before we had a chance to examine it. In that case, the value |
aoqi@0 | 359 | // will have been logged in the LNC for that chunk. |
aoqi@0 | 360 | // We need to examine as many chunks to the right as this object |
aoqi@0 | 361 | // covers. However, we need to bound this checking to the largest |
aoqi@0 | 362 | // entry in the LNC array: this is because the heap may expand |
aoqi@0 | 363 | // after the LNC array has been created but before we reach this point, |
aoqi@0 | 364 | // and the last block in our chunk may have been expanded to include |
aoqi@0 | 365 | // the expansion delta (and possibly subsequently allocated from, so |
aoqi@0 | 366 | // it wouldn't be sufficient to check whether that last block was |
aoqi@0 | 367 | // or was not an object at this point). |
aoqi@0 | 368 | uintptr_t last_chunk_index_to_check = addr_to_chunk_index(last_block + last_block_size - 1) |
aoqi@0 | 369 | - lowest_non_clean_base_chunk_index; |
aoqi@0 | 370 | const uintptr_t last_chunk_index = addr_to_chunk_index(used.last()) |
aoqi@0 | 371 | - lowest_non_clean_base_chunk_index; |
aoqi@0 | 372 | if (last_chunk_index_to_check > last_chunk_index) { |
aoqi@0 | 373 | assert(last_block + last_block_size > used.end(), |
aoqi@0 | 374 | err_msg("Inconsistency detected: last_block [" PTR_FORMAT "," PTR_FORMAT "]" |
aoqi@0 | 375 | " does not exceed used.end() = " PTR_FORMAT "," |
aoqi@0 | 376 | " yet last_chunk_index_to_check " INTPTR_FORMAT |
aoqi@0 | 377 | " exceeds last_chunk_index " INTPTR_FORMAT, |
aoqi@0 | 378 | last_block, last_block + last_block_size, |
aoqi@0 | 379 | used.end(), |
aoqi@0 | 380 | last_chunk_index_to_check, last_chunk_index)); |
aoqi@0 | 381 | assert(sp->used_region().end() > used.end(), |
aoqi@0 | 382 | err_msg("Expansion did not happen: " |
aoqi@0 | 383 | "[" PTR_FORMAT "," PTR_FORMAT ") -> [" PTR_FORMAT "," PTR_FORMAT ")", |
aoqi@0 | 384 | sp->used_region().start(), sp->used_region().end(), used.start(), used.end())); |
aoqi@0 | 385 | NOISY(tty->print_cr(" process_chunk_boundary: heap expanded; explicitly bounding last_chunk");) |
aoqi@0 | 386 | last_chunk_index_to_check = last_chunk_index; |
aoqi@0 | 387 | } |
aoqi@0 | 388 | for (uintptr_t lnc_index = cur_chunk_index + 1; |
aoqi@0 | 389 | lnc_index <= last_chunk_index_to_check; |
aoqi@0 | 390 | lnc_index++) { |
aoqi@0 | 391 | jbyte* lnc_card = lowest_non_clean[lnc_index]; |
aoqi@0 | 392 | if (lnc_card != NULL) { |
aoqi@0 | 393 | // we can stop at the first non-NULL entry we find |
aoqi@0 | 394 | if (lnc_card <= limit_card) { |
aoqi@0 | 395 | NOISY(tty->print_cr(" process_chunk_boundary: LNC card " PTR_FORMAT " is lower than limit_card " PTR_FORMAT, |
aoqi@0 | 396 | " max_to_do will be lowered to " PTR_FORMAT " from " PTR_FORMAT, |
aoqi@0 | 397 | lnc_card, limit_card, addr_for(lnc_card), max_to_do);) |
aoqi@0 | 398 | limit_card = lnc_card; |
aoqi@0 | 399 | max_to_do = addr_for(limit_card); |
aoqi@0 | 400 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
aoqi@0 | 401 | } |
aoqi@0 | 402 | // In any case, we break now |
aoqi@0 | 403 | break; |
aoqi@0 | 404 | } // else continue to look for a non-NULL entry if any |
aoqi@0 | 405 | } |
aoqi@0 | 406 | assert(limit_card != NULL && max_to_do != NULL, "Error"); |
aoqi@0 | 407 | } |
aoqi@0 | 408 | assert(max_to_do != NULL, "OOPS 1 !"); |
aoqi@0 | 409 | } |
aoqi@0 | 410 | assert(max_to_do != NULL, "OOPS 2!"); |
aoqi@0 | 411 | } else { |
aoqi@0 | 412 | max_to_do = used.end(); |
aoqi@0 | 413 | NOISY(tty->print_cr(" process_chunk_boundary: Last chunk of this space;\n" |
aoqi@0 | 414 | " max_to_do left at " PTR_FORMAT, |
aoqi@0 | 415 | max_to_do);) |
aoqi@0 | 416 | } |
aoqi@0 | 417 | assert(max_to_do != NULL, "OOPS 3!"); |
aoqi@0 | 418 | // Now we can set the closure we're using so it doesn't to beyond |
aoqi@0 | 419 | // max_to_do. |
aoqi@0 | 420 | dcto_cl->set_min_done(max_to_do); |
aoqi@0 | 421 | #ifndef PRODUCT |
aoqi@0 | 422 | dcto_cl->set_last_bottom(max_to_do); |
aoqi@0 | 423 | #endif |
aoqi@0 | 424 | NOISY(tty->print_cr("===========================================================================\n");) |
aoqi@0 | 425 | } |
aoqi@0 | 426 | |
aoqi@0 | 427 | #undef NOISY |
aoqi@0 | 428 | |
aoqi@0 | 429 | void |
aoqi@0 | 430 | CardTableModRefBS:: |
aoqi@0 | 431 | get_LNC_array_for_space(Space* sp, |
aoqi@0 | 432 | jbyte**& lowest_non_clean, |
aoqi@0 | 433 | uintptr_t& lowest_non_clean_base_chunk_index, |
aoqi@0 | 434 | size_t& lowest_non_clean_chunk_size) { |
aoqi@0 | 435 | |
aoqi@0 | 436 | int i = find_covering_region_containing(sp->bottom()); |
aoqi@0 | 437 | MemRegion covered = _covered[i]; |
aoqi@0 | 438 | size_t n_chunks = chunks_to_cover(covered); |
aoqi@0 | 439 | |
aoqi@0 | 440 | // Only the first thread to obtain the lock will resize the |
aoqi@0 | 441 | // LNC array for the covered region. Any later expansion can't affect |
aoqi@0 | 442 | // the used_at_save_marks region. |
aoqi@0 | 443 | // (I observed a bug in which the first thread to execute this would |
aoqi@0 | 444 | // resize, and then it would cause "expand_and_allocate" that would |
aoqi@0 | 445 | // increase the number of chunks in the covered region. Then a second |
aoqi@0 | 446 | // thread would come and execute this, see that the size didn't match, |
aoqi@0 | 447 | // and free and allocate again. So the first thread would be using a |
aoqi@0 | 448 | // freed "_lowest_non_clean" array.) |
aoqi@0 | 449 | |
aoqi@0 | 450 | // Do a dirty read here. If we pass the conditional then take the rare |
aoqi@0 | 451 | // event lock and do the read again in case some other thread had already |
aoqi@0 | 452 | // succeeded and done the resize. |
aoqi@0 | 453 | int cur_collection = Universe::heap()->total_collections(); |
aoqi@0 | 454 | if (_last_LNC_resizing_collection[i] != cur_collection) { |
aoqi@0 | 455 | MutexLocker x(ParGCRareEvent_lock); |
aoqi@0 | 456 | if (_last_LNC_resizing_collection[i] != cur_collection) { |
aoqi@0 | 457 | if (_lowest_non_clean[i] == NULL || |
aoqi@0 | 458 | n_chunks != _lowest_non_clean_chunk_size[i]) { |
aoqi@0 | 459 | |
aoqi@0 | 460 | // Should we delete the old? |
aoqi@0 | 461 | if (_lowest_non_clean[i] != NULL) { |
aoqi@0 | 462 | assert(n_chunks != _lowest_non_clean_chunk_size[i], |
aoqi@0 | 463 | "logical consequence"); |
aoqi@0 | 464 | FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i], mtGC); |
aoqi@0 | 465 | _lowest_non_clean[i] = NULL; |
aoqi@0 | 466 | } |
aoqi@0 | 467 | // Now allocate a new one if necessary. |
aoqi@0 | 468 | if (_lowest_non_clean[i] == NULL) { |
aoqi@0 | 469 | _lowest_non_clean[i] = NEW_C_HEAP_ARRAY(CardPtr, n_chunks, mtGC); |
aoqi@0 | 470 | _lowest_non_clean_chunk_size[i] = n_chunks; |
aoqi@0 | 471 | _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start()); |
aoqi@0 | 472 | for (int j = 0; j < (int)n_chunks; j++) |
aoqi@0 | 473 | _lowest_non_clean[i][j] = NULL; |
aoqi@0 | 474 | } |
aoqi@0 | 475 | } |
aoqi@0 | 476 | _last_LNC_resizing_collection[i] = cur_collection; |
aoqi@0 | 477 | } |
aoqi@0 | 478 | } |
aoqi@0 | 479 | // In any case, now do the initialization. |
aoqi@0 | 480 | lowest_non_clean = _lowest_non_clean[i]; |
aoqi@0 | 481 | lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i]; |
aoqi@0 | 482 | lowest_non_clean_chunk_size = _lowest_non_clean_chunk_size[i]; |
aoqi@0 | 483 | } |