Fri, 13 Jan 2012 13:27:48 -0800
7121547: G1: High number mispredicted branches while iterating over the marking bitmap
Summary: There is a high number of mispredicted branches associated with calling BitMap::iteratate() from within CMBitMapRO::iterate(). Implement a version of CMBitMapRO::iterate() directly using inline-able routines.
Reviewed-by: tonyp, iveresov
1.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Jan 13 01:55:22 2012 -0800 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Jan 13 13:27:48 2012 -0800 1.3 @@ -104,17 +104,6 @@ 1.4 return (int) (diff >> _shifter); 1.5 } 1.6 1.7 -bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 1.8 - HeapWord* left = MAX2(_bmStartWord, mr.start()); 1.9 - HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 1.10 - if (right > left) { 1.11 - // Right-open interval [leftOffset, rightOffset). 1.12 - return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 1.13 - } else { 1.14 - return true; 1.15 - } 1.16 -} 1.17 - 1.18 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 1.19 size_t from_start_index, 1.20 HeapWord* to_start_word,
2.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Jan 13 01:55:22 2012 -0800 2.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Jan 13 13:27:48 2012 -0800 2.3 @@ -84,8 +84,8 @@ 2.4 } 2.5 2.6 // iteration 2.7 - bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); } 2.8 - bool iterate(BitMapClosure* cl, MemRegion mr); 2.9 + inline bool iterate(BitMapClosure* cl, MemRegion mr); 2.10 + inline bool iterate(BitMapClosure* cl); 2.11 2.12 // Return the address corresponding to the next marked bit at or after 2.13 // "addr", and before "limit", if "limit" is non-NULL. If there is no
3.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp Fri Jan 13 01:55:22 2012 -0800 3.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp Fri Jan 13 13:27:48 2012 -0800 3.3 @@ -28,6 +28,35 @@ 3.4 #include "gc_implementation/g1/concurrentMark.hpp" 3.5 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 3.6 3.7 +inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 3.8 + HeapWord* start_addr = MAX2(startWord(), mr.start()); 3.9 + HeapWord* end_addr = MIN2(endWord(), mr.end()); 3.10 + 3.11 + if (end_addr > start_addr) { 3.12 + // Right-open interval [start-offset, end-offset). 3.13 + BitMap::idx_t start_offset = heapWordToOffset(start_addr); 3.14 + BitMap::idx_t end_offset = heapWordToOffset(end_addr); 3.15 + 3.16 + start_offset = _bm.get_next_one_offset(start_offset, end_offset); 3.17 + while (start_offset < end_offset) { 3.18 + HeapWord* obj_addr = offsetToHeapWord(start_offset); 3.19 + oop obj = (oop) obj_addr; 3.20 + if (!cl->do_bit(start_offset)) { 3.21 + return false; 3.22 + } 3.23 + HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr); 3.24 + BitMap::idx_t next_offset = heapWordToOffset(next_addr); 3.25 + start_offset = _bm.get_next_one_offset(next_offset, end_offset); 3.26 + } 3.27 + } 3.28 + return true; 3.29 +} 3.30 + 3.31 +inline bool CMBitMapRO::iterate(BitMapClosure* cl) { 3.32 + MemRegion mr(startWord(), sizeInWords()); 3.33 + return iterate(cl, mr); 3.34 +} 3.35 + 3.36 inline void CMTask::push(oop obj) { 3.37 HeapWord* objAddr = (HeapWord*) obj; 3.38 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
4.1 --- a/src/share/vm/utilities/bitMap.inline.hpp Fri Jan 13 01:55:22 2012 -0800 4.2 +++ b/src/share/vm/utilities/bitMap.inline.hpp Fri Jan 13 13:27:48 2012 -0800 4.3 @@ -1,5 +1,5 @@ 4.4 /* 4.5 - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 4.6 + * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. 4.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.8 * 4.9 * This code is free software; you can redistribute it and/or modify it 4.10 @@ -178,8 +178,30 @@ 4.11 for (; !(res & 1); res_offset++) { 4.12 res = res >> 1; 4.13 } 4.14 - assert(res_offset >= l_offset && 4.15 - res_offset < r_offset, "just checking"); 4.16 + 4.17 +#ifdef ASSERT 4.18 + // In the following assert, if r_offset is not bitamp word aligned, 4.19 + // checking that res_offset is strictly less than r_offset is too 4.20 + // strong and will trip the assert. 4.21 + // 4.22 + // Consider the case where l_offset is bit 15 and r_offset is bit 17 4.23 + // of the same map word, and where bits [15:16:17:18] == [00:00:00:01]. 4.24 + // All the bits in the range [l_offset:r_offset) are 0. 4.25 + // The loop that calculates res_offset, above, would yield the offset 4.26 + // of bit 18 because it's in the same map word as l_offset and there 4.27 + // is a set bit in that map word above l_offset (i.e. res != NoBits). 4.28 + // 4.29 + // In this case, however, we can assert is that res_offset is strictly 4.30 + // less than size() since we know that there is at least one set bit 4.31 + // at an offset above, but in the same map word as, r_offset. 4.32 + // Otherwise, if r_offset is word aligned then it will not be in the 4.33 + // same map word as l_offset (unless it equals l_offset). So either 4.34 + // there won't be a set bit between l_offset and the end of it's map 4.35 + // word (i.e. res == NoBits), or res_offset will be less than r_offset. 4.36 + 4.37 + idx_t limit = is_word_aligned(r_offset) ? r_offset : size(); 4.38 + assert(res_offset >= l_offset && res_offset < limit, "just checking"); 4.39 +#endif // ASSERT 4.40 return MIN2(res_offset, r_offset); 4.41 } 4.42 // skip over all word length 0-bit runs