1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,434 @@ 1.4 +/* 1.5 + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP 1.29 +#define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP 1.30 + 1.31 +#include "gc_implementation/g1/concurrentMark.hpp" 1.32 +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 1.33 + 1.34 +// Utility routine to set an exclusive range of cards on the given 1.35 +// card liveness bitmap 1.36 +inline void ConcurrentMark::set_card_bitmap_range(BitMap* card_bm, 1.37 + BitMap::idx_t start_idx, 1.38 + BitMap::idx_t end_idx, 1.39 + bool is_par) { 1.40 + 1.41 + // Set the exclusive bit range [start_idx, end_idx). 1.42 + assert((end_idx - start_idx) > 0, "at least one card"); 1.43 + assert(end_idx <= card_bm->size(), "sanity"); 1.44 + 1.45 + // Silently clip the end index 1.46 + end_idx = MIN2(end_idx, card_bm->size()); 1.47 + 1.48 + // For small ranges use a simple loop; otherwise use set_range or 1.49 + // use par_at_put_range (if parallel). The range is made up of the 1.50 + // cards that are spanned by an object/mem region so 8 cards will 1.51 + // allow up to object sizes up to 4K to be handled using the loop. 1.52 + if ((end_idx - start_idx) <= 8) { 1.53 + for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) { 1.54 + if (is_par) { 1.55 + card_bm->par_set_bit(i); 1.56 + } else { 1.57 + card_bm->set_bit(i); 1.58 + } 1.59 + } 1.60 + } else { 1.61 + // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive. 1.62 + if (is_par) { 1.63 + card_bm->par_at_put_range(start_idx, end_idx, true); 1.64 + } else { 1.65 + card_bm->set_range(start_idx, end_idx); 1.66 + } 1.67 + } 1.68 +} 1.69 + 1.70 +// Returns the index in the liveness accounting card bitmap 1.71 +// for the given address 1.72 +inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) { 1.73 + // Below, the term "card num" means the result of shifting an address 1.74 + // by the card shift -- address 0 corresponds to card number 0. One 1.75 + // must subtract the card num of the bottom of the heap to obtain a 1.76 + // card table index. 1.77 + intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift); 1.78 + return card_num - heap_bottom_card_num(); 1.79 +} 1.80 + 1.81 +// Counts the given memory region in the given task/worker 1.82 +// counting data structures. 1.83 +inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr, 1.84 + size_t* marked_bytes_array, 1.85 + BitMap* task_card_bm) { 1.86 + G1CollectedHeap* g1h = _g1h; 1.87 + CardTableModRefBS* ct_bs = g1h->g1_barrier_set(); 1.88 + 1.89 + HeapWord* start = mr.start(); 1.90 + HeapWord* end = mr.end(); 1.91 + size_t region_size_bytes = mr.byte_size(); 1.92 + uint index = hr->hrs_index(); 1.93 + 1.94 + assert(!hr->continuesHumongous(), "should not be HC region"); 1.95 + assert(hr == g1h->heap_region_containing(start), "sanity"); 1.96 + assert(hr == g1h->heap_region_containing(mr.last()), "sanity"); 1.97 + assert(marked_bytes_array != NULL, "pre-condition"); 1.98 + assert(task_card_bm != NULL, "pre-condition"); 1.99 + 1.100 + // Add to the task local marked bytes for this region. 1.101 + marked_bytes_array[index] += region_size_bytes; 1.102 + 1.103 + BitMap::idx_t start_idx = card_bitmap_index_for(start); 1.104 + BitMap::idx_t end_idx = card_bitmap_index_for(end); 1.105 + 1.106 + // Note: if we're looking at the last region in heap - end 1.107 + // could be actually just beyond the end of the heap; end_idx 1.108 + // will then correspond to a (non-existent) card that is also 1.109 + // just beyond the heap. 1.110 + if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) { 1.111 + // end of region is not card aligned - incremement to cover 1.112 + // all the cards spanned by the region. 1.113 + end_idx += 1; 1.114 + } 1.115 + // The card bitmap is task/worker specific => no need to use 1.116 + // the 'par' BitMap routines. 1.117 + // Set bits in the exclusive bit range [start_idx, end_idx). 1.118 + set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */); 1.119 +} 1.120 + 1.121 +// Counts the given memory region in the task/worker counting 1.122 +// data structures for the given worker id. 1.123 +inline void ConcurrentMark::count_region(MemRegion mr, 1.124 + HeapRegion* hr, 1.125 + uint worker_id) { 1.126 + size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id); 1.127 + BitMap* task_card_bm = count_card_bitmap_for(worker_id); 1.128 + count_region(mr, hr, marked_bytes_array, task_card_bm); 1.129 +} 1.130 + 1.131 +// Counts the given memory region, which may be a single object, in the 1.132 +// task/worker counting data structures for the given worker id. 1.133 +inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) { 1.134 + HeapWord* addr = mr.start(); 1.135 + HeapRegion* hr = _g1h->heap_region_containing_raw(addr); 1.136 + count_region(mr, hr, worker_id); 1.137 +} 1.138 + 1.139 +// Counts the given object in the given task/worker counting data structures. 1.140 +inline void ConcurrentMark::count_object(oop obj, 1.141 + HeapRegion* hr, 1.142 + size_t* marked_bytes_array, 1.143 + BitMap* task_card_bm) { 1.144 + MemRegion mr((HeapWord*)obj, obj->size()); 1.145 + count_region(mr, hr, marked_bytes_array, task_card_bm); 1.146 +} 1.147 + 1.148 +// Counts the given object in the task/worker counting data 1.149 +// structures for the given worker id. 1.150 +inline void ConcurrentMark::count_object(oop obj, 1.151 + HeapRegion* hr, 1.152 + uint worker_id) { 1.153 + size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id); 1.154 + BitMap* task_card_bm = count_card_bitmap_for(worker_id); 1.155 + HeapWord* addr = (HeapWord*) obj; 1.156 + count_object(obj, hr, marked_bytes_array, task_card_bm); 1.157 +} 1.158 + 1.159 +// Attempts to mark the given object and, if successful, counts 1.160 +// the object in the given task/worker counting structures. 1.161 +inline bool ConcurrentMark::par_mark_and_count(oop obj, 1.162 + HeapRegion* hr, 1.163 + size_t* marked_bytes_array, 1.164 + BitMap* task_card_bm) { 1.165 + HeapWord* addr = (HeapWord*)obj; 1.166 + if (_nextMarkBitMap->parMark(addr)) { 1.167 + // Update the task specific count data for the object. 1.168 + count_object(obj, hr, marked_bytes_array, task_card_bm); 1.169 + return true; 1.170 + } 1.171 + return false; 1.172 +} 1.173 + 1.174 +// Attempts to mark the given object and, if successful, counts 1.175 +// the object in the task/worker counting structures for the 1.176 +// given worker id. 1.177 +inline bool ConcurrentMark::par_mark_and_count(oop obj, 1.178 + size_t word_size, 1.179 + HeapRegion* hr, 1.180 + uint worker_id) { 1.181 + HeapWord* addr = (HeapWord*)obj; 1.182 + if (_nextMarkBitMap->parMark(addr)) { 1.183 + MemRegion mr(addr, word_size); 1.184 + count_region(mr, hr, worker_id); 1.185 + return true; 1.186 + } 1.187 + return false; 1.188 +} 1.189 + 1.190 +// Attempts to mark the given object and, if successful, counts 1.191 +// the object in the task/worker counting structures for the 1.192 +// given worker id. 1.193 +inline bool ConcurrentMark::par_mark_and_count(oop obj, 1.194 + HeapRegion* hr, 1.195 + uint worker_id) { 1.196 + HeapWord* addr = (HeapWord*)obj; 1.197 + if (_nextMarkBitMap->parMark(addr)) { 1.198 + // Update the task specific count data for the object. 1.199 + count_object(obj, hr, worker_id); 1.200 + return true; 1.201 + } 1.202 + return false; 1.203 +} 1.204 + 1.205 +// As above - but we don't know the heap region containing the 1.206 +// object and so have to supply it. 1.207 +inline bool ConcurrentMark::par_mark_and_count(oop obj, uint worker_id) { 1.208 + HeapWord* addr = (HeapWord*)obj; 1.209 + HeapRegion* hr = _g1h->heap_region_containing_raw(addr); 1.210 + return par_mark_and_count(obj, hr, worker_id); 1.211 +} 1.212 + 1.213 +// Similar to the above routine but we already know the size, in words, of 1.214 +// the object that we wish to mark/count 1.215 +inline bool ConcurrentMark::par_mark_and_count(oop obj, 1.216 + size_t word_size, 1.217 + uint worker_id) { 1.218 + HeapWord* addr = (HeapWord*)obj; 1.219 + if (_nextMarkBitMap->parMark(addr)) { 1.220 + // Update the task specific count data for the object. 1.221 + MemRegion mr(addr, word_size); 1.222 + count_region(mr, worker_id); 1.223 + return true; 1.224 + } 1.225 + return false; 1.226 +} 1.227 + 1.228 +// Unconditionally mark the given object, and unconditinally count 1.229 +// the object in the counting structures for worker id 0. 1.230 +// Should *not* be called from parallel code. 1.231 +inline bool ConcurrentMark::mark_and_count(oop obj, HeapRegion* hr) { 1.232 + HeapWord* addr = (HeapWord*)obj; 1.233 + _nextMarkBitMap->mark(addr); 1.234 + // Update the task specific count data for the object. 1.235 + count_object(obj, hr, 0 /* worker_id */); 1.236 + return true; 1.237 +} 1.238 + 1.239 +// As above - but we don't have the heap region containing the 1.240 +// object, so we have to supply it. 1.241 +inline bool ConcurrentMark::mark_and_count(oop obj) { 1.242 + HeapWord* addr = (HeapWord*)obj; 1.243 + HeapRegion* hr = _g1h->heap_region_containing_raw(addr); 1.244 + return mark_and_count(obj, hr); 1.245 +} 1.246 + 1.247 +inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 1.248 + HeapWord* start_addr = MAX2(startWord(), mr.start()); 1.249 + HeapWord* end_addr = MIN2(endWord(), mr.end()); 1.250 + 1.251 + if (end_addr > start_addr) { 1.252 + // Right-open interval [start-offset, end-offset). 1.253 + BitMap::idx_t start_offset = heapWordToOffset(start_addr); 1.254 + BitMap::idx_t end_offset = heapWordToOffset(end_addr); 1.255 + 1.256 + start_offset = _bm.get_next_one_offset(start_offset, end_offset); 1.257 + while (start_offset < end_offset) { 1.258 + if (!cl->do_bit(start_offset)) { 1.259 + return false; 1.260 + } 1.261 + HeapWord* next_addr = MIN2(nextObject(offsetToHeapWord(start_offset)), end_addr); 1.262 + BitMap::idx_t next_offset = heapWordToOffset(next_addr); 1.263 + start_offset = _bm.get_next_one_offset(next_offset, end_offset); 1.264 + } 1.265 + } 1.266 + return true; 1.267 +} 1.268 + 1.269 +inline bool CMBitMapRO::iterate(BitMapClosure* cl) { 1.270 + MemRegion mr(startWord(), sizeInWords()); 1.271 + return iterate(cl, mr); 1.272 +} 1.273 + 1.274 +inline void CMTask::push(oop obj) { 1.275 + HeapWord* objAddr = (HeapWord*) obj; 1.276 + assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 1.277 + assert(!_g1h->is_on_master_free_list( 1.278 + _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); 1.279 + assert(!_g1h->is_obj_ill(obj), "invariant"); 1.280 + assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 1.281 + 1.282 + if (_cm->verbose_high()) { 1.283 + gclog_or_tty->print_cr("[%u] pushing " PTR_FORMAT, _worker_id, p2i((void*) obj)); 1.284 + } 1.285 + 1.286 + if (!_task_queue->push(obj)) { 1.287 + // The local task queue looks full. We need to push some entries 1.288 + // to the global stack. 1.289 + 1.290 + if (_cm->verbose_medium()) { 1.291 + gclog_or_tty->print_cr("[%u] task queue overflow, " 1.292 + "moving entries to the global stack", 1.293 + _worker_id); 1.294 + } 1.295 + move_entries_to_global_stack(); 1.296 + 1.297 + // this should succeed since, even if we overflow the global 1.298 + // stack, we should have definitely removed some entries from the 1.299 + // local queue. So, there must be space on it. 1.300 + bool success = _task_queue->push(obj); 1.301 + assert(success, "invariant"); 1.302 + } 1.303 + 1.304 + statsOnly( int tmp_size = _task_queue->size(); 1.305 + if (tmp_size > _local_max_size) { 1.306 + _local_max_size = tmp_size; 1.307 + } 1.308 + ++_local_pushes ); 1.309 +} 1.310 + 1.311 +// This determines whether the method below will check both the local 1.312 +// and global fingers when determining whether to push on the stack a 1.313 +// gray object (value 1) or whether it will only check the global one 1.314 +// (value 0). The tradeoffs are that the former will be a bit more 1.315 +// accurate and possibly push less on the stack, but it might also be 1.316 +// a little bit slower. 1.317 + 1.318 +#define _CHECK_BOTH_FINGERS_ 1 1.319 + 1.320 +inline void CMTask::deal_with_reference(oop obj) { 1.321 + if (_cm->verbose_high()) { 1.322 + gclog_or_tty->print_cr("[%u] we're dealing with reference = "PTR_FORMAT, 1.323 + _worker_id, p2i((void*) obj)); 1.324 + } 1.325 + 1.326 + ++_refs_reached; 1.327 + 1.328 + HeapWord* objAddr = (HeapWord*) obj; 1.329 + assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 1.330 + if (_g1h->is_in_g1_reserved(objAddr)) { 1.331 + assert(obj != NULL, "null check is implicit"); 1.332 + if (!_nextMarkBitMap->isMarked(objAddr)) { 1.333 + // Only get the containing region if the object is not marked on the 1.334 + // bitmap (otherwise, it's a waste of time since we won't do 1.335 + // anything with it). 1.336 + HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 1.337 + if (!hr->obj_allocated_since_next_marking(obj)) { 1.338 + if (_cm->verbose_high()) { 1.339 + gclog_or_tty->print_cr("[%u] "PTR_FORMAT" is not considered marked", 1.340 + _worker_id, p2i((void*) obj)); 1.341 + } 1.342 + 1.343 + // we need to mark it first 1.344 + if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) { 1.345 + // No OrderAccess:store_load() is needed. It is implicit in the 1.346 + // CAS done in CMBitMap::parMark() call in the routine above. 1.347 + HeapWord* global_finger = _cm->finger(); 1.348 + 1.349 +#if _CHECK_BOTH_FINGERS_ 1.350 + // we will check both the local and global fingers 1.351 + 1.352 + if (_finger != NULL && objAddr < _finger) { 1.353 + if (_cm->verbose_high()) { 1.354 + gclog_or_tty->print_cr("[%u] below the local finger ("PTR_FORMAT"), " 1.355 + "pushing it", _worker_id, p2i(_finger)); 1.356 + } 1.357 + push(obj); 1.358 + } else if (_curr_region != NULL && objAddr < _region_limit) { 1.359 + // do nothing 1.360 + } else if (objAddr < global_finger) { 1.361 + // Notice that the global finger might be moving forward 1.362 + // concurrently. This is not a problem. In the worst case, we 1.363 + // mark the object while it is above the global finger and, by 1.364 + // the time we read the global finger, it has moved forward 1.365 + // passed this object. In this case, the object will probably 1.366 + // be visited when a task is scanning the region and will also 1.367 + // be pushed on the stack. So, some duplicate work, but no 1.368 + // correctness problems. 1.369 + 1.370 + if (_cm->verbose_high()) { 1.371 + gclog_or_tty->print_cr("[%u] below the global finger " 1.372 + "("PTR_FORMAT"), pushing it", 1.373 + _worker_id, p2i(global_finger)); 1.374 + } 1.375 + push(obj); 1.376 + } else { 1.377 + // do nothing 1.378 + } 1.379 +#else // _CHECK_BOTH_FINGERS_ 1.380 + // we will only check the global finger 1.381 + 1.382 + if (objAddr < global_finger) { 1.383 + // see long comment above 1.384 + 1.385 + if (_cm->verbose_high()) { 1.386 + gclog_or_tty->print_cr("[%u] below the global finger " 1.387 + "("PTR_FORMAT"), pushing it", 1.388 + _worker_id, p2i(global_finger)); 1.389 + } 1.390 + push(obj); 1.391 + } 1.392 +#endif // _CHECK_BOTH_FINGERS_ 1.393 + } 1.394 + } 1.395 + } 1.396 + } 1.397 +} 1.398 + 1.399 +inline void ConcurrentMark::markPrev(oop p) { 1.400 + assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity"); 1.401 + // Note we are overriding the read-only view of the prev map here, via 1.402 + // the cast. 1.403 + ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p); 1.404 +} 1.405 + 1.406 +inline void ConcurrentMark::grayRoot(oop obj, size_t word_size, 1.407 + uint worker_id, HeapRegion* hr) { 1.408 + assert(obj != NULL, "pre-condition"); 1.409 + HeapWord* addr = (HeapWord*) obj; 1.410 + if (hr == NULL) { 1.411 + hr = _g1h->heap_region_containing_raw(addr); 1.412 + } else { 1.413 + assert(hr->is_in(addr), "pre-condition"); 1.414 + } 1.415 + assert(hr != NULL, "sanity"); 1.416 + // Given that we're looking for a region that contains an object 1.417 + // header it's impossible to get back a HC region. 1.418 + assert(!hr->continuesHumongous(), "sanity"); 1.419 + 1.420 + // We cannot assert that word_size == obj->size() given that obj 1.421 + // might not be in a consistent state (another thread might be in 1.422 + // the process of copying it). So the best thing we can do is to 1.423 + // assert that word_size is under an upper bound which is its 1.424 + // containing region's capacity. 1.425 + assert(word_size * HeapWordSize <= hr->capacity(), 1.426 + err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT, 1.427 + word_size * HeapWordSize, hr->capacity(), 1.428 + HR_FORMAT_PARAMS(hr))); 1.429 + 1.430 + if (addr < hr->next_top_at_mark_start()) { 1.431 + if (!_nextMarkBitMap->isMarked(addr)) { 1.432 + par_mark_and_count(obj, word_size, hr, worker_id); 1.433 + } 1.434 + } 1.435 +} 1.436 + 1.437 +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP