src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp

Fri, 10 Oct 2014 15:51:58 +0200

author
tschatzl
date
Fri, 10 Oct 2014 15:51:58 +0200
changeset 7257
e7d0505c8a30
parent 7094
9337d0e7ea4f
child 7535
7ae4e26cb1e0
child 7829
24c446b2460d
permissions
-rw-r--r--

8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso

     1 /*
     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
    26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
    28 #include "gc_implementation/g1/concurrentMark.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    31 // Utility routine to set an exclusive range of cards on the given
    32 // card liveness bitmap
    33 inline void ConcurrentMark::set_card_bitmap_range(BitMap* card_bm,
    34                                                   BitMap::idx_t start_idx,
    35                                                   BitMap::idx_t end_idx,
    36                                                   bool is_par) {
    38   // Set the exclusive bit range [start_idx, end_idx).
    39   assert((end_idx - start_idx) > 0, "at least one card");
    40   assert(end_idx <= card_bm->size(), "sanity");
    42   // Silently clip the end index
    43   end_idx = MIN2(end_idx, card_bm->size());
    45   // For small ranges use a simple loop; otherwise use set_range or
    46   // use par_at_put_range (if parallel). The range is made up of the
    47   // cards that are spanned by an object/mem region so 8 cards will
    48   // allow up to object sizes up to 4K to be handled using the loop.
    49   if ((end_idx - start_idx) <= 8) {
    50     for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
    51       if (is_par) {
    52         card_bm->par_set_bit(i);
    53       } else {
    54         card_bm->set_bit(i);
    55       }
    56     }
    57   } else {
    58     // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive.
    59     if (is_par) {
    60       card_bm->par_at_put_range(start_idx, end_idx, true);
    61     } else {
    62       card_bm->set_range(start_idx, end_idx);
    63     }
    64   }
    65 }
    67 // Returns the index in the liveness accounting card bitmap
    68 // for the given address
    69 inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
    70   // Below, the term "card num" means the result of shifting an address
    71   // by the card shift -- address 0 corresponds to card number 0.  One
    72   // must subtract the card num of the bottom of the heap to obtain a
    73   // card table index.
    74   intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
    75   return card_num - heap_bottom_card_num();
    76 }
    78 // Counts the given memory region in the given task/worker
    79 // counting data structures.
    80 inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
    81                                          size_t* marked_bytes_array,
    82                                          BitMap* task_card_bm) {
    83   G1CollectedHeap* g1h = _g1h;
    84   CardTableModRefBS* ct_bs = g1h->g1_barrier_set();
    86   HeapWord* start = mr.start();
    87   HeapWord* end = mr.end();
    88   size_t region_size_bytes = mr.byte_size();
    89   uint index = hr->hrm_index();
    91   assert(!hr->continuesHumongous(), "should not be HC region");
    92   assert(hr == g1h->heap_region_containing(start), "sanity");
    93   assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
    94   assert(marked_bytes_array != NULL, "pre-condition");
    95   assert(task_card_bm != NULL, "pre-condition");
    97   // Add to the task local marked bytes for this region.
    98   marked_bytes_array[index] += region_size_bytes;
   100   BitMap::idx_t start_idx = card_bitmap_index_for(start);
   101   BitMap::idx_t end_idx = card_bitmap_index_for(end);
   103   // Note: if we're looking at the last region in heap - end
   104   // could be actually just beyond the end of the heap; end_idx
   105   // will then correspond to a (non-existent) card that is also
   106   // just beyond the heap.
   107   if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) {
   108     // end of region is not card aligned - incremement to cover
   109     // all the cards spanned by the region.
   110     end_idx += 1;
   111   }
   112   // The card bitmap is task/worker specific => no need to use
   113   // the 'par' BitMap routines.
   114   // Set bits in the exclusive bit range [start_idx, end_idx).
   115   set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */);
   116 }
   118 // Counts the given memory region in the task/worker counting
   119 // data structures for the given worker id.
   120 inline void ConcurrentMark::count_region(MemRegion mr,
   121                                          HeapRegion* hr,
   122                                          uint worker_id) {
   123   size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
   124   BitMap* task_card_bm = count_card_bitmap_for(worker_id);
   125   count_region(mr, hr, marked_bytes_array, task_card_bm);
   126 }
   128 // Counts the given object in the given task/worker counting data structures.
   129 inline void ConcurrentMark::count_object(oop obj,
   130                                          HeapRegion* hr,
   131                                          size_t* marked_bytes_array,
   132                                          BitMap* task_card_bm) {
   133   MemRegion mr((HeapWord*)obj, obj->size());
   134   count_region(mr, hr, marked_bytes_array, task_card_bm);
   135 }
   137 // Attempts to mark the given object and, if successful, counts
   138 // the object in the given task/worker counting structures.
   139 inline bool ConcurrentMark::par_mark_and_count(oop obj,
   140                                                HeapRegion* hr,
   141                                                size_t* marked_bytes_array,
   142                                                BitMap* task_card_bm) {
   143   HeapWord* addr = (HeapWord*)obj;
   144   if (_nextMarkBitMap->parMark(addr)) {
   145     // Update the task specific count data for the object.
   146     count_object(obj, hr, marked_bytes_array, task_card_bm);
   147     return true;
   148   }
   149   return false;
   150 }
   152 // Attempts to mark the given object and, if successful, counts
   153 // the object in the task/worker counting structures for the
   154 // given worker id.
   155 inline bool ConcurrentMark::par_mark_and_count(oop obj,
   156                                                size_t word_size,
   157                                                HeapRegion* hr,
   158                                                uint worker_id) {
   159   HeapWord* addr = (HeapWord*)obj;
   160   if (_nextMarkBitMap->parMark(addr)) {
   161     MemRegion mr(addr, word_size);
   162     count_region(mr, hr, worker_id);
   163     return true;
   164   }
   165   return false;
   166 }
   168 inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
   169   HeapWord* start_addr = MAX2(startWord(), mr.start());
   170   HeapWord* end_addr = MIN2(endWord(), mr.end());
   172   if (end_addr > start_addr) {
   173     // Right-open interval [start-offset, end-offset).
   174     BitMap::idx_t start_offset = heapWordToOffset(start_addr);
   175     BitMap::idx_t end_offset = heapWordToOffset(end_addr);
   177     start_offset = _bm.get_next_one_offset(start_offset, end_offset);
   178     while (start_offset < end_offset) {
   179       if (!cl->do_bit(start_offset)) {
   180         return false;
   181       }
   182       HeapWord* next_addr = MIN2(nextObject(offsetToHeapWord(start_offset)), end_addr);
   183       BitMap::idx_t next_offset = heapWordToOffset(next_addr);
   184       start_offset = _bm.get_next_one_offset(next_offset, end_offset);
   185     }
   186   }
   187   return true;
   188 }
   190 inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
   191   MemRegion mr(startWord(), sizeInWords());
   192   return iterate(cl, mr);
   193 }
   195 #define check_mark(addr)                                                       \
   196   assert(_bmStartWord <= (addr) && (addr) < (_bmStartWord + _bmWordSize),      \
   197          "outside underlying space?");                                         \
   198   assert(G1CollectedHeap::heap()->is_in_exact(addr),                           \
   199          err_msg("Trying to access not available bitmap "PTR_FORMAT            \
   200                  " corresponding to "PTR_FORMAT" (%u)",                        \
   201                  p2i(this), p2i(addr), G1CollectedHeap::heap()->addr_to_region(addr)));
   203 inline void CMBitMap::mark(HeapWord* addr) {
   204   check_mark(addr);
   205   _bm.set_bit(heapWordToOffset(addr));
   206 }
   208 inline void CMBitMap::clear(HeapWord* addr) {
   209   check_mark(addr);
   210   _bm.clear_bit(heapWordToOffset(addr));
   211 }
   213 inline bool CMBitMap::parMark(HeapWord* addr) {
   214   check_mark(addr);
   215   return _bm.par_set_bit(heapWordToOffset(addr));
   216 }
   218 inline bool CMBitMap::parClear(HeapWord* addr) {
   219   check_mark(addr);
   220   return _bm.par_clear_bit(heapWordToOffset(addr));
   221 }
   223 #undef check_mark
   225 inline void CMTask::push(oop obj) {
   226   HeapWord* objAddr = (HeapWord*) obj;
   227   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
   228   assert(!_g1h->is_on_master_free_list(
   229               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
   230   assert(!_g1h->is_obj_ill(obj), "invariant");
   231   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
   233   if (_cm->verbose_high()) {
   234     gclog_or_tty->print_cr("[%u] pushing " PTR_FORMAT, _worker_id, p2i((void*) obj));
   235   }
   237   if (!_task_queue->push(obj)) {
   238     // The local task queue looks full. We need to push some entries
   239     // to the global stack.
   241     if (_cm->verbose_medium()) {
   242       gclog_or_tty->print_cr("[%u] task queue overflow, "
   243                              "moving entries to the global stack",
   244                              _worker_id);
   245     }
   246     move_entries_to_global_stack();
   248     // this should succeed since, even if we overflow the global
   249     // stack, we should have definitely removed some entries from the
   250     // local queue. So, there must be space on it.
   251     bool success = _task_queue->push(obj);
   252     assert(success, "invariant");
   253   }
   255   statsOnly( int tmp_size = _task_queue->size();
   256              if (tmp_size > _local_max_size) {
   257                _local_max_size = tmp_size;
   258              }
   259              ++_local_pushes );
   260 }
   262 // This determines whether the method below will check both the local
   263 // and global fingers when determining whether to push on the stack a
   264 // gray object (value 1) or whether it will only check the global one
   265 // (value 0). The tradeoffs are that the former will be a bit more
   266 // accurate and possibly push less on the stack, but it might also be
   267 // a little bit slower.
   269 #define _CHECK_BOTH_FINGERS_      1
   271 inline void CMTask::deal_with_reference(oop obj) {
   272   if (_cm->verbose_high()) {
   273     gclog_or_tty->print_cr("[%u] we're dealing with reference = "PTR_FORMAT,
   274                            _worker_id, p2i((void*) obj));
   275   }
   277   ++_refs_reached;
   279   HeapWord* objAddr = (HeapWord*) obj;
   280   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
   281   if (_g1h->is_in_g1_reserved(objAddr)) {
   282     assert(obj != NULL, "null check is implicit");
   283     if (!_nextMarkBitMap->isMarked(objAddr)) {
   284       // Only get the containing region if the object is not marked on the
   285       // bitmap (otherwise, it's a waste of time since we won't do
   286       // anything with it).
   287       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
   288       if (!hr->obj_allocated_since_next_marking(obj)) {
   289         if (_cm->verbose_high()) {
   290           gclog_or_tty->print_cr("[%u] "PTR_FORMAT" is not considered marked",
   291                                  _worker_id, p2i((void*) obj));
   292         }
   294         // we need to mark it first
   295         if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
   296           // No OrderAccess:store_load() is needed. It is implicit in the
   297           // CAS done in CMBitMap::parMark() call in the routine above.
   298           HeapWord* global_finger = _cm->finger();
   300 #if _CHECK_BOTH_FINGERS_
   301           // we will check both the local and global fingers
   303           if (_finger != NULL && objAddr < _finger) {
   304             if (_cm->verbose_high()) {
   305               gclog_or_tty->print_cr("[%u] below the local finger ("PTR_FORMAT"), "
   306                                      "pushing it", _worker_id, p2i(_finger));
   307             }
   308             push(obj);
   309           } else if (_curr_region != NULL && objAddr < _region_limit) {
   310             // do nothing
   311           } else if (objAddr < global_finger) {
   312             // Notice that the global finger might be moving forward
   313             // concurrently. This is not a problem. In the worst case, we
   314             // mark the object while it is above the global finger and, by
   315             // the time we read the global finger, it has moved forward
   316             // passed this object. In this case, the object will probably
   317             // be visited when a task is scanning the region and will also
   318             // be pushed on the stack. So, some duplicate work, but no
   319             // correctness problems.
   321             if (_cm->verbose_high()) {
   322               gclog_or_tty->print_cr("[%u] below the global finger "
   323                                      "("PTR_FORMAT"), pushing it",
   324                                      _worker_id, p2i(global_finger));
   325             }
   326             push(obj);
   327           } else {
   328             // do nothing
   329           }
   330 #else // _CHECK_BOTH_FINGERS_
   331           // we will only check the global finger
   333           if (objAddr < global_finger) {
   334             // see long comment above
   336             if (_cm->verbose_high()) {
   337               gclog_or_tty->print_cr("[%u] below the global finger "
   338                                      "("PTR_FORMAT"), pushing it",
   339                                      _worker_id, p2i(global_finger));
   340             }
   341             push(obj);
   342           }
   343 #endif // _CHECK_BOTH_FINGERS_
   344         }
   345       }
   346     }
   347   }
   348 }
   350 inline void ConcurrentMark::markPrev(oop p) {
   351   assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
   352   // Note we are overriding the read-only view of the prev map here, via
   353   // the cast.
   354   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
   355 }
   357 inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
   358                                      uint worker_id, HeapRegion* hr) {
   359   assert(obj != NULL, "pre-condition");
   360   HeapWord* addr = (HeapWord*) obj;
   361   if (hr == NULL) {
   362     hr = _g1h->heap_region_containing_raw(addr);
   363   } else {
   364     assert(hr->is_in(addr), "pre-condition");
   365   }
   366   assert(hr != NULL, "sanity");
   367   // Given that we're looking for a region that contains an object
   368   // header it's impossible to get back a HC region.
   369   assert(!hr->continuesHumongous(), "sanity");
   371   // We cannot assert that word_size == obj->size() given that obj
   372   // might not be in a consistent state (another thread might be in
   373   // the process of copying it). So the best thing we can do is to
   374   // assert that word_size is under an upper bound which is its
   375   // containing region's capacity.
   376   assert(word_size * HeapWordSize <= hr->capacity(),
   377          err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
   378                  word_size * HeapWordSize, hr->capacity(),
   379                  HR_FORMAT_PARAMS(hr)));
   381   if (addr < hr->next_top_at_mark_start()) {
   382     if (!_nextMarkBitMap->isMarked(addr)) {
   383       par_mark_and_count(obj, word_size, hr, worker_id);
   384     }
   385   }
   386 }
   388 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP

mercurial