src/share/vm/gc_implementation/g1/concurrentMark.hpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,1292 @@
     1.4 +/*
     1.5 + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
    1.29 +#define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
    1.30 +
    1.31 +#include "gc_implementation/g1/heapRegionSet.hpp"
    1.32 +#include "utilities/taskqueue.hpp"
    1.33 +
    1.34 +class G1CollectedHeap;
    1.35 +class CMTask;
    1.36 +typedef GenericTaskQueue<oop, mtGC>            CMTaskQueue;
    1.37 +typedef GenericTaskQueueSet<CMTaskQueue, mtGC> CMTaskQueueSet;
    1.38 +
    1.39 +// Closure used by CM during concurrent reference discovery
    1.40 +// and reference processing (during remarking) to determine
    1.41 +// if a particular object is alive. It is primarily used
    1.42 +// to determine if referents of discovered reference objects
    1.43 +// are alive. An instance is also embedded into the
    1.44 +// reference processor as the _is_alive_non_header field
    1.45 +class G1CMIsAliveClosure: public BoolObjectClosure {
    1.46 +  G1CollectedHeap* _g1;
    1.47 + public:
    1.48 +  G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { }
    1.49 +
    1.50 +  bool do_object_b(oop obj);
    1.51 +};
    1.52 +
    1.53 +// A generic CM bit map.  This is essentially a wrapper around the BitMap
    1.54 +// class, with one bit per (1<<_shifter) HeapWords.
    1.55 +
    1.56 +class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
    1.57 + protected:
    1.58 +  HeapWord* _bmStartWord;      // base address of range covered by map
    1.59 +  size_t    _bmWordSize;       // map size (in #HeapWords covered)
    1.60 +  const int _shifter;          // map to char or bit
    1.61 +  VirtualSpace _virtual_space; // underlying the bit map
    1.62 +  BitMap    _bm;               // the bit map itself
    1.63 +
    1.64 + public:
    1.65 +  // constructor
    1.66 +  CMBitMapRO(int shifter);
    1.67 +
    1.68 +  enum { do_yield = true };
    1.69 +
    1.70 +  // inquiries
    1.71 +  HeapWord* startWord()   const { return _bmStartWord; }
    1.72 +  size_t    sizeInWords() const { return _bmWordSize;  }
    1.73 +  // the following is one past the last word in space
    1.74 +  HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
    1.75 +
    1.76 +  // read marks
    1.77 +
    1.78 +  bool isMarked(HeapWord* addr) const {
    1.79 +    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
    1.80 +           "outside underlying space?");
    1.81 +    return _bm.at(heapWordToOffset(addr));
    1.82 +  }
    1.83 +
    1.84 +  // iteration
    1.85 +  inline bool iterate(BitMapClosure* cl, MemRegion mr);
    1.86 +  inline bool iterate(BitMapClosure* cl);
    1.87 +
    1.88 +  // Return the address corresponding to the next marked bit at or after
    1.89 +  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
    1.90 +  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
    1.91 +  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
    1.92 +                                     HeapWord* limit = NULL) const;
    1.93 +  // Return the address corresponding to the next unmarked bit at or after
    1.94 +  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
    1.95 +  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
    1.96 +  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
    1.97 +                                       HeapWord* limit = NULL) const;
    1.98 +
    1.99 +  // conversion utilities
   1.100 +  HeapWord* offsetToHeapWord(size_t offset) const {
   1.101 +    return _bmStartWord + (offset << _shifter);
   1.102 +  }
   1.103 +  size_t heapWordToOffset(HeapWord* addr) const {
   1.104 +    return pointer_delta(addr, _bmStartWord) >> _shifter;
   1.105 +  }
   1.106 +  int heapWordDiffToOffsetDiff(size_t diff) const;
   1.107 +
   1.108 +  // The argument addr should be the start address of a valid object
   1.109 +  HeapWord* nextObject(HeapWord* addr) {
   1.110 +    oop obj = (oop) addr;
   1.111 +    HeapWord* res =  addr + obj->size();
   1.112 +    assert(offsetToHeapWord(heapWordToOffset(res)) == res, "sanity");
   1.113 +    return res;
   1.114 +  }
   1.115 +
   1.116 +  void print_on_error(outputStream* st, const char* prefix) const;
   1.117 +
   1.118 +  // debugging
   1.119 +  NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
   1.120 +};
   1.121 +
   1.122 +class CMBitMap : public CMBitMapRO {
   1.123 +
   1.124 + public:
   1.125 +  // constructor
   1.126 +  CMBitMap(int shifter) :
   1.127 +    CMBitMapRO(shifter) {}
   1.128 +
   1.129 +  // Allocates the back store for the marking bitmap
   1.130 +  bool allocate(ReservedSpace heap_rs);
   1.131 +
   1.132 +  // write marks
   1.133 +  void mark(HeapWord* addr) {
   1.134 +    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   1.135 +           "outside underlying space?");
   1.136 +    _bm.set_bit(heapWordToOffset(addr));
   1.137 +  }
   1.138 +  void clear(HeapWord* addr) {
   1.139 +    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   1.140 +           "outside underlying space?");
   1.141 +    _bm.clear_bit(heapWordToOffset(addr));
   1.142 +  }
   1.143 +  bool parMark(HeapWord* addr) {
   1.144 +    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   1.145 +           "outside underlying space?");
   1.146 +    return _bm.par_set_bit(heapWordToOffset(addr));
   1.147 +  }
   1.148 +  bool parClear(HeapWord* addr) {
   1.149 +    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   1.150 +           "outside underlying space?");
   1.151 +    return _bm.par_clear_bit(heapWordToOffset(addr));
   1.152 +  }
   1.153 +  void markRange(MemRegion mr);
   1.154 +  void clearAll();
   1.155 +  void clearRange(MemRegion mr);
   1.156 +
   1.157 +  // Starting at the bit corresponding to "addr" (inclusive), find the next
   1.158 +  // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
   1.159 +  // the end of this run (stopping at "end_addr").  Return the MemRegion
   1.160 +  // covering from the start of the region corresponding to the first bit
   1.161 +  // of the run to the end of the region corresponding to the last bit of
   1.162 +  // the run.  If there is no "1" bit at or after "addr", return an empty
   1.163 +  // MemRegion.
   1.164 +  MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
   1.165 +};
   1.166 +
   1.167 +// Represents a marking stack used by ConcurrentMarking in the G1 collector.
   1.168 +class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   1.169 +  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
   1.170 +  ConcurrentMark* _cm;
   1.171 +  oop* _base;        // bottom of stack
   1.172 +  jint _index;       // one more than last occupied index
   1.173 +  jint _capacity;    // max #elements
   1.174 +  jint _saved_index; // value of _index saved at start of GC
   1.175 +  NOT_PRODUCT(jint _max_depth;)   // max depth plumbed during run
   1.176 +
   1.177 +  bool  _overflow;
   1.178 +  bool  _should_expand;
   1.179 +  DEBUG_ONLY(bool _drain_in_progress;)
   1.180 +  DEBUG_ONLY(bool _drain_in_progress_yields;)
   1.181 +
   1.182 + public:
   1.183 +  CMMarkStack(ConcurrentMark* cm);
   1.184 +  ~CMMarkStack();
   1.185 +
   1.186 +#ifndef PRODUCT
   1.187 +  jint max_depth() const {
   1.188 +    return _max_depth;
   1.189 +  }
   1.190 +#endif
   1.191 +
   1.192 +  bool allocate(size_t capacity);
   1.193 +
   1.194 +  oop pop() {
   1.195 +    if (!isEmpty()) {
   1.196 +      return _base[--_index] ;
   1.197 +    }
   1.198 +    return NULL;
   1.199 +  }
   1.200 +
   1.201 +  // If overflow happens, don't do the push, and record the overflow.
   1.202 +  // *Requires* that "ptr" is already marked.
   1.203 +  void push(oop ptr) {
   1.204 +    if (isFull()) {
   1.205 +      // Record overflow.
   1.206 +      _overflow = true;
   1.207 +      return;
   1.208 +    } else {
   1.209 +      _base[_index++] = ptr;
   1.210 +      NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
   1.211 +    }
   1.212 +  }
   1.213 +  // Non-block impl.  Note: concurrency is allowed only with other
   1.214 +  // "par_push" operations, not with "pop" or "drain".  We would need
   1.215 +  // parallel versions of them if such concurrency was desired.
   1.216 +  void par_push(oop ptr);
   1.217 +
   1.218 +  // Pushes the first "n" elements of "ptr_arr" on the stack.
   1.219 +  // Non-block impl.  Note: concurrency is allowed only with other
   1.220 +  // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
   1.221 +  void par_adjoin_arr(oop* ptr_arr, int n);
   1.222 +
   1.223 +  // Pushes the first "n" elements of "ptr_arr" on the stack.
   1.224 +  // Locking impl: concurrency is allowed only with
   1.225 +  // "par_push_arr" and/or "par_pop_arr" operations, which use the same
   1.226 +  // locking strategy.
   1.227 +  void par_push_arr(oop* ptr_arr, int n);
   1.228 +
   1.229 +  // If returns false, the array was empty.  Otherwise, removes up to "max"
   1.230 +  // elements from the stack, and transfers them to "ptr_arr" in an
   1.231 +  // unspecified order.  The actual number transferred is given in "n" ("n
   1.232 +  // == 0" is deliberately redundant with the return value.)  Locking impl:
   1.233 +  // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
   1.234 +  // operations, which use the same locking strategy.
   1.235 +  bool par_pop_arr(oop* ptr_arr, int max, int* n);
   1.236 +
   1.237 +  // Drain the mark stack, applying the given closure to all fields of
   1.238 +  // objects on the stack.  (That is, continue until the stack is empty,
   1.239 +  // even if closure applications add entries to the stack.)  The "bm"
   1.240 +  // argument, if non-null, may be used to verify that only marked objects
   1.241 +  // are on the mark stack.  If "yield_after" is "true", then the
   1.242 +  // concurrent marker performing the drain offers to yield after
   1.243 +  // processing each object.  If a yield occurs, stops the drain operation
   1.244 +  // and returns false.  Otherwise, returns true.
   1.245 +  template<class OopClosureClass>
   1.246 +  bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
   1.247 +
   1.248 +  bool isEmpty()    { return _index == 0; }
   1.249 +  bool isFull()     { return _index == _capacity; }
   1.250 +  int  maxElems()   { return _capacity; }
   1.251 +
   1.252 +  bool overflow() { return _overflow; }
   1.253 +  void clear_overflow() { _overflow = false; }
   1.254 +
   1.255 +  bool should_expand() const { return _should_expand; }
   1.256 +  void set_should_expand();
   1.257 +
   1.258 +  // Expand the stack, typically in response to an overflow condition
   1.259 +  void expand();
   1.260 +
   1.261 +  int  size() { return _index; }
   1.262 +
   1.263 +  void setEmpty()   { _index = 0; clear_overflow(); }
   1.264 +
   1.265 +  // Record the current index.
   1.266 +  void note_start_of_gc();
   1.267 +
   1.268 +  // Make sure that we have not added any entries to the stack during GC.
   1.269 +  void note_end_of_gc();
   1.270 +
   1.271 +  // iterate over the oops in the mark stack, up to the bound recorded via
   1.272 +  // the call above.
   1.273 +  void oops_do(OopClosure* f);
   1.274 +};
   1.275 +
   1.276 +class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
   1.277 +private:
   1.278 +#ifndef PRODUCT
   1.279 +  uintx _num_remaining;
   1.280 +  bool _force;
   1.281 +#endif // !defined(PRODUCT)
   1.282 +
   1.283 +public:
   1.284 +  void init() PRODUCT_RETURN;
   1.285 +  void update() PRODUCT_RETURN;
   1.286 +  bool should_force() PRODUCT_RETURN_( return false; );
   1.287 +};
   1.288 +
   1.289 +// this will enable a variety of different statistics per GC task
   1.290 +#define _MARKING_STATS_       0
   1.291 +// this will enable the higher verbose levels
   1.292 +#define _MARKING_VERBOSE_     0
   1.293 +
   1.294 +#if _MARKING_STATS_
   1.295 +#define statsOnly(statement)  \
   1.296 +do {                          \
   1.297 +  statement ;                 \
   1.298 +} while (0)
   1.299 +#else // _MARKING_STATS_
   1.300 +#define statsOnly(statement)  \
   1.301 +do {                          \
   1.302 +} while (0)
   1.303 +#endif // _MARKING_STATS_
   1.304 +
   1.305 +typedef enum {
   1.306 +  no_verbose  = 0,   // verbose turned off
   1.307 +  stats_verbose,     // only prints stats at the end of marking
   1.308 +  low_verbose,       // low verbose, mostly per region and per major event
   1.309 +  medium_verbose,    // a bit more detailed than low
   1.310 +  high_verbose       // per object verbose
   1.311 +} CMVerboseLevel;
   1.312 +
   1.313 +class YoungList;
   1.314 +
   1.315 +// Root Regions are regions that are not empty at the beginning of a
   1.316 +// marking cycle and which we might collect during an evacuation pause
   1.317 +// while the cycle is active. Given that, during evacuation pauses, we
   1.318 +// do not copy objects that are explicitly marked, what we have to do
   1.319 +// for the root regions is to scan them and mark all objects reachable
   1.320 +// from them. According to the SATB assumptions, we only need to visit
   1.321 +// each object once during marking. So, as long as we finish this scan
   1.322 +// before the next evacuation pause, we can copy the objects from the
   1.323 +// root regions without having to mark them or do anything else to them.
   1.324 +//
   1.325 +// Currently, we only support root region scanning once (at the start
   1.326 +// of the marking cycle) and the root regions are all the survivor
   1.327 +// regions populated during the initial-mark pause.
   1.328 +class CMRootRegions VALUE_OBJ_CLASS_SPEC {
   1.329 +private:
   1.330 +  YoungList*           _young_list;
   1.331 +  ConcurrentMark*      _cm;
   1.332 +
   1.333 +  volatile bool        _scan_in_progress;
   1.334 +  volatile bool        _should_abort;
   1.335 +  HeapRegion* volatile _next_survivor;
   1.336 +
   1.337 +public:
   1.338 +  CMRootRegions();
   1.339 +  // We actually do most of the initialization in this method.
   1.340 +  void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
   1.341 +
   1.342 +  // Reset the claiming / scanning of the root regions.
   1.343 +  void prepare_for_scan();
   1.344 +
   1.345 +  // Forces get_next() to return NULL so that the iteration aborts early.
   1.346 +  void abort() { _should_abort = true; }
   1.347 +
   1.348 +  // Return true if the CM thread are actively scanning root regions,
   1.349 +  // false otherwise.
   1.350 +  bool scan_in_progress() { return _scan_in_progress; }
   1.351 +
   1.352 +  // Claim the next root region to scan atomically, or return NULL if
   1.353 +  // all have been claimed.
   1.354 +  HeapRegion* claim_next();
   1.355 +
   1.356 +  // Flag that we're done with root region scanning and notify anyone
   1.357 +  // who's waiting on it. If aborted is false, assume that all regions
   1.358 +  // have been claimed.
   1.359 +  void scan_finished();
   1.360 +
   1.361 +  // If CM threads are still scanning root regions, wait until they
   1.362 +  // are done. Return true if we had to wait, false otherwise.
   1.363 +  bool wait_until_scan_finished();
   1.364 +};
   1.365 +
   1.366 +class ConcurrentMarkThread;
   1.367 +
   1.368 +class ConcurrentMark: public CHeapObj<mtGC> {
   1.369 +  friend class CMMarkStack;
   1.370 +  friend class ConcurrentMarkThread;
   1.371 +  friend class CMTask;
   1.372 +  friend class CMBitMapClosure;
   1.373 +  friend class CMGlobalObjectClosure;
   1.374 +  friend class CMRemarkTask;
   1.375 +  friend class CMConcurrentMarkingTask;
   1.376 +  friend class G1ParNoteEndTask;
   1.377 +  friend class CalcLiveObjectsClosure;
   1.378 +  friend class G1CMRefProcTaskProxy;
   1.379 +  friend class G1CMRefProcTaskExecutor;
   1.380 +  friend class G1CMKeepAliveAndDrainClosure;
   1.381 +  friend class G1CMDrainMarkingStackClosure;
   1.382 +
   1.383 +protected:
   1.384 +  ConcurrentMarkThread* _cmThread;   // the thread doing the work
   1.385 +  G1CollectedHeap*      _g1h;        // the heap.
   1.386 +  uint                  _parallel_marking_threads; // the number of marking
   1.387 +                                                   // threads we're use
   1.388 +  uint                  _max_parallel_marking_threads; // max number of marking
   1.389 +                                                   // threads we'll ever use
   1.390 +  double                _sleep_factor; // how much we have to sleep, with
   1.391 +                                       // respect to the work we just did, to
   1.392 +                                       // meet the marking overhead goal
   1.393 +  double                _marking_task_overhead; // marking target overhead for
   1.394 +                                                // a single task
   1.395 +
   1.396 +  // same as the two above, but for the cleanup task
   1.397 +  double                _cleanup_sleep_factor;
   1.398 +  double                _cleanup_task_overhead;
   1.399 +
   1.400 +  FreeRegionList        _cleanup_list;
   1.401 +
   1.402 +  // Concurrent marking support structures
   1.403 +  CMBitMap                _markBitMap1;
   1.404 +  CMBitMap                _markBitMap2;
   1.405 +  CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
   1.406 +  CMBitMap*               _nextMarkBitMap; // under-construction mark bitmap
   1.407 +
   1.408 +  BitMap                  _region_bm;
   1.409 +  BitMap                  _card_bm;
   1.410 +
   1.411 +  // Heap bounds
   1.412 +  HeapWord*               _heap_start;
   1.413 +  HeapWord*               _heap_end;
   1.414 +
   1.415 +  // Root region tracking and claiming.
   1.416 +  CMRootRegions           _root_regions;
   1.417 +
   1.418 +  // For gray objects
   1.419 +  CMMarkStack             _markStack; // Grey objects behind global finger.
   1.420 +  HeapWord* volatile      _finger;  // the global finger, region aligned,
   1.421 +                                    // always points to the end of the
   1.422 +                                    // last claimed region
   1.423 +
   1.424 +  // marking tasks
   1.425 +  uint                    _max_worker_id;// maximum worker id
   1.426 +  uint                    _active_tasks; // task num currently active
   1.427 +  CMTask**                _tasks;        // task queue array (max_worker_id len)
   1.428 +  CMTaskQueueSet*         _task_queues;  // task queue set
   1.429 +  ParallelTaskTerminator  _terminator;   // for termination
   1.430 +
   1.431 +  // Two sync barriers that are used to synchronise tasks when an
   1.432 +  // overflow occurs. The algorithm is the following. All tasks enter
   1.433 +  // the first one to ensure that they have all stopped manipulating
   1.434 +  // the global data structures. After they exit it, they re-initialise
   1.435 +  // their data structures and task 0 re-initialises the global data
   1.436 +  // structures. Then, they enter the second sync barrier. This
   1.437 +  // ensure, that no task starts doing work before all data
   1.438 +  // structures (local and global) have been re-initialised. When they
   1.439 +  // exit it, they are free to start working again.
   1.440 +  WorkGangBarrierSync     _first_overflow_barrier_sync;
   1.441 +  WorkGangBarrierSync     _second_overflow_barrier_sync;
   1.442 +
   1.443 +  // this is set by any task, when an overflow on the global data
   1.444 +  // structures is detected.
   1.445 +  volatile bool           _has_overflown;
   1.446 +  // true: marking is concurrent, false: we're in remark
   1.447 +  volatile bool           _concurrent;
   1.448 +  // set at the end of a Full GC so that marking aborts
   1.449 +  volatile bool           _has_aborted;
   1.450 +
   1.451 +  // used when remark aborts due to an overflow to indicate that
   1.452 +  // another concurrent marking phase should start
   1.453 +  volatile bool           _restart_for_overflow;
   1.454 +
   1.455 +  // This is true from the very start of concurrent marking until the
   1.456 +  // point when all the tasks complete their work. It is really used
   1.457 +  // to determine the points between the end of concurrent marking and
   1.458 +  // time of remark.
   1.459 +  volatile bool           _concurrent_marking_in_progress;
   1.460 +
   1.461 +  // verbose level
   1.462 +  CMVerboseLevel          _verbose_level;
   1.463 +
   1.464 +  // All of these times are in ms.
   1.465 +  NumberSeq _init_times;
   1.466 +  NumberSeq _remark_times;
   1.467 +  NumberSeq   _remark_mark_times;
   1.468 +  NumberSeq   _remark_weak_ref_times;
   1.469 +  NumberSeq _cleanup_times;
   1.470 +  double    _total_counting_time;
   1.471 +  double    _total_rs_scrub_time;
   1.472 +
   1.473 +  double*   _accum_task_vtime;   // accumulated task vtime
   1.474 +
   1.475 +  FlexibleWorkGang* _parallel_workers;
   1.476 +
   1.477 +  ForceOverflowSettings _force_overflow_conc;
   1.478 +  ForceOverflowSettings _force_overflow_stw;
   1.479 +
   1.480 +  void weakRefsWork(bool clear_all_soft_refs);
   1.481 +
   1.482 +  void swapMarkBitMaps();
   1.483 +
   1.484 +  // It resets the global marking data structures, as well as the
   1.485 +  // task local ones; should be called during initial mark.
   1.486 +  void reset();
   1.487 +
   1.488 +  // Resets all the marking data structures. Called when we have to restart
   1.489 +  // marking or when marking completes (via set_non_marking_state below).
   1.490 +  void reset_marking_state(bool clear_overflow = true);
   1.491 +
   1.492 +  // We do this after we're done with marking so that the marking data
   1.493 +  // structures are initialised to a sensible and predictable state.
   1.494 +  void set_non_marking_state();
   1.495 +
   1.496 +  // Called to indicate how many threads are currently active.
   1.497 +  void set_concurrency(uint active_tasks);
   1.498 +
   1.499 +  // It should be called to indicate which phase we're in (concurrent
   1.500 +  // mark or remark) and how many threads are currently active.
   1.501 +  void set_concurrency_and_phase(uint active_tasks, bool concurrent);
   1.502 +
   1.503 +  // prints all gathered CM-related statistics
   1.504 +  void print_stats();
   1.505 +
   1.506 +  bool cleanup_list_is_empty() {
   1.507 +    return _cleanup_list.is_empty();
   1.508 +  }
   1.509 +
   1.510 +  // accessor methods
   1.511 +  uint parallel_marking_threads() const     { return _parallel_marking_threads; }
   1.512 +  uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;}
   1.513 +  double sleep_factor()                     { return _sleep_factor; }
   1.514 +  double marking_task_overhead()            { return _marking_task_overhead;}
   1.515 +  double cleanup_sleep_factor()             { return _cleanup_sleep_factor; }
   1.516 +  double cleanup_task_overhead()            { return _cleanup_task_overhead;}
   1.517 +
   1.518 +  bool use_parallel_marking_threads() const {
   1.519 +    assert(parallel_marking_threads() <=
   1.520 +           max_parallel_marking_threads(), "sanity");
   1.521 +    assert((_parallel_workers == NULL && parallel_marking_threads() == 0) ||
   1.522 +           parallel_marking_threads() > 0,
   1.523 +           "parallel workers not set up correctly");
   1.524 +    return _parallel_workers != NULL;
   1.525 +  }
   1.526 +
   1.527 +  HeapWord*               finger()          { return _finger;   }
   1.528 +  bool                    concurrent()      { return _concurrent; }
   1.529 +  uint                    active_tasks()    { return _active_tasks; }
   1.530 +  ParallelTaskTerminator* terminator()      { return &_terminator; }
   1.531 +
   1.532 +  // It claims the next available region to be scanned by a marking
   1.533 +  // task/thread. It might return NULL if the next region is empty or
   1.534 +  // we have run out of regions. In the latter case, out_of_regions()
   1.535 +  // determines whether we've really run out of regions or the task
   1.536 +  // should call claim_region() again. This might seem a bit
   1.537 +  // awkward. Originally, the code was written so that claim_region()
   1.538 +  // either successfully returned with a non-empty region or there
   1.539 +  // were no more regions to be claimed. The problem with this was
   1.540 +  // that, in certain circumstances, it iterated over large chunks of
   1.541 +  // the heap finding only empty regions and, while it was working, it
   1.542 +  // was preventing the calling task to call its regular clock
   1.543 +  // method. So, this way, each task will spend very little time in
   1.544 +  // claim_region() and is allowed to call the regular clock method
   1.545 +  // frequently.
   1.546 +  HeapRegion* claim_region(uint worker_id);
   1.547 +
   1.548 +  // It determines whether we've run out of regions to scan. Note that
   1.549 +  // the finger can point past the heap end in case the heap was expanded
   1.550 +  // to satisfy an allocation without doing a GC. This is fine, because all
   1.551 +  // objects in those regions will be considered live anyway because of
   1.552 +  // SATB guarantees (i.e. their TAMS will be equal to bottom).
   1.553 +  bool        out_of_regions() { return _finger >= _heap_end; }
   1.554 +
   1.555 +  // Returns the task with the given id
   1.556 +  CMTask* task(int id) {
   1.557 +    assert(0 <= id && id < (int) _active_tasks,
   1.558 +           "task id not within active bounds");
   1.559 +    return _tasks[id];
   1.560 +  }
   1.561 +
   1.562 +  // Returns the task queue with the given id
   1.563 +  CMTaskQueue* task_queue(int id) {
   1.564 +    assert(0 <= id && id < (int) _active_tasks,
   1.565 +           "task queue id not within active bounds");
   1.566 +    return (CMTaskQueue*) _task_queues->queue(id);
   1.567 +  }
   1.568 +
   1.569 +  // Returns the task queue set
   1.570 +  CMTaskQueueSet* task_queues()  { return _task_queues; }
   1.571 +
   1.572 +  // Access / manipulation of the overflow flag which is set to
   1.573 +  // indicate that the global stack has overflown
   1.574 +  bool has_overflown()           { return _has_overflown; }
   1.575 +  void set_has_overflown()       { _has_overflown = true; }
   1.576 +  void clear_has_overflown()     { _has_overflown = false; }
   1.577 +  bool restart_for_overflow()    { return _restart_for_overflow; }
   1.578 +
   1.579 +  // Methods to enter the two overflow sync barriers
   1.580 +  void enter_first_sync_barrier(uint worker_id);
   1.581 +  void enter_second_sync_barrier(uint worker_id);
   1.582 +
   1.583 +  ForceOverflowSettings* force_overflow_conc() {
   1.584 +    return &_force_overflow_conc;
   1.585 +  }
   1.586 +
   1.587 +  ForceOverflowSettings* force_overflow_stw() {
   1.588 +    return &_force_overflow_stw;
   1.589 +  }
   1.590 +
   1.591 +  ForceOverflowSettings* force_overflow() {
   1.592 +    if (concurrent()) {
   1.593 +      return force_overflow_conc();
   1.594 +    } else {
   1.595 +      return force_overflow_stw();
   1.596 +    }
   1.597 +  }
   1.598 +
   1.599 +  // Live Data Counting data structures...
   1.600 +  // These data structures are initialized at the start of
   1.601 +  // marking. They are written to while marking is active.
   1.602 +  // They are aggregated during remark; the aggregated values
   1.603 +  // are then used to populate the _region_bm, _card_bm, and
   1.604 +  // the total live bytes, which are then subsequently updated
   1.605 +  // during cleanup.
   1.606 +
   1.607 +  // An array of bitmaps (one bit map per task). Each bitmap
   1.608 +  // is used to record the cards spanned by the live objects
   1.609 +  // marked by that task/worker.
   1.610 +  BitMap*  _count_card_bitmaps;
   1.611 +
   1.612 +  // Used to record the number of marked live bytes
   1.613 +  // (for each region, by worker thread).
   1.614 +  size_t** _count_marked_bytes;
   1.615 +
   1.616 +  // Card index of the bottom of the G1 heap. Used for biasing indices into
   1.617 +  // the card bitmaps.
   1.618 +  intptr_t _heap_bottom_card_num;
   1.619 +
   1.620 +  // Set to true when initialization is complete
   1.621 +  bool _completed_initialization;
   1.622 +
   1.623 +public:
   1.624 +  // Manipulation of the global mark stack.
   1.625 +  // Notice that the first mark_stack_push is CAS-based, whereas the
   1.626 +  // two below are Mutex-based. This is OK since the first one is only
   1.627 +  // called during evacuation pauses and doesn't compete with the
   1.628 +  // other two (which are called by the marking tasks during
   1.629 +  // concurrent marking or remark).
   1.630 +  bool mark_stack_push(oop p) {
   1.631 +    _markStack.par_push(p);
   1.632 +    if (_markStack.overflow()) {
   1.633 +      set_has_overflown();
   1.634 +      return false;
   1.635 +    }
   1.636 +    return true;
   1.637 +  }
   1.638 +  bool mark_stack_push(oop* arr, int n) {
   1.639 +    _markStack.par_push_arr(arr, n);
   1.640 +    if (_markStack.overflow()) {
   1.641 +      set_has_overflown();
   1.642 +      return false;
   1.643 +    }
   1.644 +    return true;
   1.645 +  }
   1.646 +  void mark_stack_pop(oop* arr, int max, int* n) {
   1.647 +    _markStack.par_pop_arr(arr, max, n);
   1.648 +  }
   1.649 +  size_t mark_stack_size()                { return _markStack.size(); }
   1.650 +  size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
   1.651 +  bool mark_stack_overflow()              { return _markStack.overflow(); }
   1.652 +  bool mark_stack_empty()                 { return _markStack.isEmpty(); }
   1.653 +
   1.654 +  CMRootRegions* root_regions() { return &_root_regions; }
   1.655 +
   1.656 +  bool concurrent_marking_in_progress() {
   1.657 +    return _concurrent_marking_in_progress;
   1.658 +  }
   1.659 +  void set_concurrent_marking_in_progress() {
   1.660 +    _concurrent_marking_in_progress = true;
   1.661 +  }
   1.662 +  void clear_concurrent_marking_in_progress() {
   1.663 +    _concurrent_marking_in_progress = false;
   1.664 +  }
   1.665 +
   1.666 +  void update_accum_task_vtime(int i, double vtime) {
   1.667 +    _accum_task_vtime[i] += vtime;
   1.668 +  }
   1.669 +
   1.670 +  double all_task_accum_vtime() {
   1.671 +    double ret = 0.0;
   1.672 +    for (uint i = 0; i < _max_worker_id; ++i)
   1.673 +      ret += _accum_task_vtime[i];
   1.674 +    return ret;
   1.675 +  }
   1.676 +
   1.677 +  // Attempts to steal an object from the task queues of other tasks
   1.678 +  bool try_stealing(uint worker_id, int* hash_seed, oop& obj) {
   1.679 +    return _task_queues->steal(worker_id, hash_seed, obj);
   1.680 +  }
   1.681 +
   1.682 +  ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
   1.683 +  ~ConcurrentMark();
   1.684 +
   1.685 +  ConcurrentMarkThread* cmThread() { return _cmThread; }
   1.686 +
   1.687 +  CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
   1.688 +  CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
   1.689 +
   1.690 +  // Returns the number of GC threads to be used in a concurrent
   1.691 +  // phase based on the number of GC threads being used in a STW
   1.692 +  // phase.
   1.693 +  uint scale_parallel_threads(uint n_par_threads);
   1.694 +
   1.695 +  // Calculates the number of GC threads to be used in a concurrent phase.
   1.696 +  uint calc_parallel_marking_threads();
   1.697 +
   1.698 +  // The following three are interaction between CM and
   1.699 +  // G1CollectedHeap
   1.700 +
   1.701 +  // This notifies CM that a root during initial-mark needs to be
   1.702 +  // grayed. It is MT-safe. word_size is the size of the object in
   1.703 +  // words. It is passed explicitly as sometimes we cannot calculate
   1.704 +  // it from the given object because it might be in an inconsistent
   1.705 +  // state (e.g., in to-space and being copied). So the caller is
   1.706 +  // responsible for dealing with this issue (e.g., get the size from
   1.707 +  // the from-space image when the to-space image might be
   1.708 +  // inconsistent) and always passing the size. hr is the region that
   1.709 +  // contains the object and it's passed optionally from callers who
   1.710 +  // might already have it (no point in recalculating it).
   1.711 +  inline void grayRoot(oop obj, size_t word_size,
   1.712 +                       uint worker_id, HeapRegion* hr = NULL);
   1.713 +
   1.714 +  // It iterates over the heap and for each object it comes across it
   1.715 +  // will dump the contents of its reference fields, as well as
   1.716 +  // liveness information for the object and its referents. The dump
   1.717 +  // will be written to a file with the following name:
   1.718 +  // G1PrintReachableBaseFile + "." + str.
   1.719 +  // vo decides whether the prev (vo == UsePrevMarking), the next
   1.720 +  // (vo == UseNextMarking) marking information, or the mark word
   1.721 +  // (vo == UseMarkWord) will be used to determine the liveness of
   1.722 +  // each object / referent.
   1.723 +  // If all is true, all objects in the heap will be dumped, otherwise
   1.724 +  // only the live ones. In the dump the following symbols / breviations
   1.725 +  // are used:
   1.726 +  //   M : an explicitly live object (its bitmap bit is set)
   1.727 +  //   > : an implicitly live object (over tams)
   1.728 +  //   O : an object outside the G1 heap (typically: in the perm gen)
   1.729 +  //   NOT : a reference field whose referent is not live
   1.730 +  //   AND MARKED : indicates that an object is both explicitly and
   1.731 +  //   implicitly live (it should be one or the other, not both)
   1.732 +  void print_reachable(const char* str,
   1.733 +                       VerifyOption vo, bool all) PRODUCT_RETURN;
   1.734 +
   1.735 +  // Clear the next marking bitmap (will be called concurrently).
   1.736 +  void clearNextBitmap();
   1.737 +
   1.738 +  // These two do the work that needs to be done before and after the
   1.739 +  // initial root checkpoint. Since this checkpoint can be done at two
   1.740 +  // different points (i.e. an explicit pause or piggy-backed on a
   1.741 +  // young collection), then it's nice to be able to easily share the
   1.742 +  // pre/post code. It might be the case that we can put everything in
   1.743 +  // the post method. TP
   1.744 +  void checkpointRootsInitialPre();
   1.745 +  void checkpointRootsInitialPost();
   1.746 +
   1.747 +  // Scan all the root regions and mark everything reachable from
   1.748 +  // them.
   1.749 +  void scanRootRegions();
   1.750 +
   1.751 +  // Scan a single root region and mark everything reachable from it.
   1.752 +  void scanRootRegion(HeapRegion* hr, uint worker_id);
   1.753 +
   1.754 +  // Do concurrent phase of marking, to a tentative transitive closure.
   1.755 +  void markFromRoots();
   1.756 +
   1.757 +  void checkpointRootsFinal(bool clear_all_soft_refs);
   1.758 +  void checkpointRootsFinalWork();
   1.759 +  void cleanup();
   1.760 +  void completeCleanup();
   1.761 +
   1.762 +  // Mark in the previous bitmap.  NB: this is usually read-only, so use
   1.763 +  // this carefully!
   1.764 +  inline void markPrev(oop p);
   1.765 +
   1.766 +  // Clears marks for all objects in the given range, for the prev,
   1.767 +  // next, or both bitmaps.  NB: the previous bitmap is usually
   1.768 +  // read-only, so use this carefully!
   1.769 +  void clearRangePrevBitmap(MemRegion mr);
   1.770 +  void clearRangeNextBitmap(MemRegion mr);
   1.771 +  void clearRangeBothBitmaps(MemRegion mr);
   1.772 +
   1.773 +  // Notify data structures that a GC has started.
   1.774 +  void note_start_of_gc() {
   1.775 +    _markStack.note_start_of_gc();
   1.776 +  }
   1.777 +
   1.778 +  // Notify data structures that a GC is finished.
   1.779 +  void note_end_of_gc() {
   1.780 +    _markStack.note_end_of_gc();
   1.781 +  }
   1.782 +
   1.783 +  // Verify that there are no CSet oops on the stacks (taskqueues /
   1.784 +  // global mark stack), enqueued SATB buffers, per-thread SATB
   1.785 +  // buffers, and fingers (global / per-task). The boolean parameters
   1.786 +  // decide which of the above data structures to verify. If marking
   1.787 +  // is not in progress, it's a no-op.
   1.788 +  void verify_no_cset_oops(bool verify_stacks,
   1.789 +                           bool verify_enqueued_buffers,
   1.790 +                           bool verify_thread_buffers,
   1.791 +                           bool verify_fingers) PRODUCT_RETURN;
   1.792 +
   1.793 +  // It is called at the end of an evacuation pause during marking so
   1.794 +  // that CM is notified of where the new end of the heap is. It
   1.795 +  // doesn't do anything if concurrent_marking_in_progress() is false,
   1.796 +  // unless the force parameter is true.
   1.797 +  void update_g1_committed(bool force = false);
   1.798 +
   1.799 +  bool isMarked(oop p) const {
   1.800 +    assert(p != NULL && p->is_oop(), "expected an oop");
   1.801 +    HeapWord* addr = (HeapWord*)p;
   1.802 +    assert(addr >= _nextMarkBitMap->startWord() ||
   1.803 +           addr < _nextMarkBitMap->endWord(), "in a region");
   1.804 +
   1.805 +    return _nextMarkBitMap->isMarked(addr);
   1.806 +  }
   1.807 +
   1.808 +  inline bool not_yet_marked(oop p) const;
   1.809 +
   1.810 +  // XXX Debug code
   1.811 +  bool containing_card_is_marked(void* p);
   1.812 +  bool containing_cards_are_marked(void* start, void* last);
   1.813 +
   1.814 +  bool isPrevMarked(oop p) const {
   1.815 +    assert(p != NULL && p->is_oop(), "expected an oop");
   1.816 +    HeapWord* addr = (HeapWord*)p;
   1.817 +    assert(addr >= _prevMarkBitMap->startWord() ||
   1.818 +           addr < _prevMarkBitMap->endWord(), "in a region");
   1.819 +
   1.820 +    return _prevMarkBitMap->isMarked(addr);
   1.821 +  }
   1.822 +
   1.823 +  inline bool do_yield_check(uint worker_i = 0);
   1.824 +  inline bool should_yield();
   1.825 +
   1.826 +  // Called to abort the marking cycle after a Full GC takes palce.
   1.827 +  void abort();
   1.828 +
   1.829 +  bool has_aborted()      { return _has_aborted; }
   1.830 +
   1.831 +  // This prints the global/local fingers. It is used for debugging.
   1.832 +  NOT_PRODUCT(void print_finger();)
   1.833 +
   1.834 +  void print_summary_info();
   1.835 +
   1.836 +  void print_worker_threads_on(outputStream* st) const;
   1.837 +
   1.838 +  void print_on_error(outputStream* st) const;
   1.839 +
   1.840 +  // The following indicate whether a given verbose level has been
   1.841 +  // set. Notice that anything above stats is conditional to
   1.842 +  // _MARKING_VERBOSE_ having been set to 1
   1.843 +  bool verbose_stats() {
   1.844 +    return _verbose_level >= stats_verbose;
   1.845 +  }
   1.846 +  bool verbose_low() {
   1.847 +    return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
   1.848 +  }
   1.849 +  bool verbose_medium() {
   1.850 +    return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
   1.851 +  }
   1.852 +  bool verbose_high() {
   1.853 +    return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
   1.854 +  }
   1.855 +
   1.856 +  // Liveness counting
   1.857 +
   1.858 +  // Utility routine to set an exclusive range of cards on the given
   1.859 +  // card liveness bitmap
   1.860 +  inline void set_card_bitmap_range(BitMap* card_bm,
   1.861 +                                    BitMap::idx_t start_idx,
   1.862 +                                    BitMap::idx_t end_idx,
   1.863 +                                    bool is_par);
   1.864 +
   1.865 +  // Returns the card number of the bottom of the G1 heap.
   1.866 +  // Used in biasing indices into accounting card bitmaps.
   1.867 +  intptr_t heap_bottom_card_num() const {
   1.868 +    return _heap_bottom_card_num;
   1.869 +  }
   1.870 +
   1.871 +  // Returns the card bitmap for a given task or worker id.
   1.872 +  BitMap* count_card_bitmap_for(uint worker_id) {
   1.873 +    assert(0 <= worker_id && worker_id < _max_worker_id, "oob");
   1.874 +    assert(_count_card_bitmaps != NULL, "uninitialized");
   1.875 +    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
   1.876 +    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
   1.877 +    return task_card_bm;
   1.878 +  }
   1.879 +
   1.880 +  // Returns the array containing the marked bytes for each region,
   1.881 +  // for the given worker or task id.
   1.882 +  size_t* count_marked_bytes_array_for(uint worker_id) {
   1.883 +    assert(0 <= worker_id && worker_id < _max_worker_id, "oob");
   1.884 +    assert(_count_marked_bytes != NULL, "uninitialized");
   1.885 +    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
   1.886 +    assert(marked_bytes_array != NULL, "uninitialized");
   1.887 +    return marked_bytes_array;
   1.888 +  }
   1.889 +
   1.890 +  // Returns the index in the liveness accounting card table bitmap
   1.891 +  // for the given address
   1.892 +  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
   1.893 +
   1.894 +  // Counts the size of the given memory region in the the given
   1.895 +  // marked_bytes array slot for the given HeapRegion.
   1.896 +  // Sets the bits in the given card bitmap that are associated with the
   1.897 +  // cards that are spanned by the memory region.
   1.898 +  inline void count_region(MemRegion mr, HeapRegion* hr,
   1.899 +                           size_t* marked_bytes_array,
   1.900 +                           BitMap* task_card_bm);
   1.901 +
   1.902 +  // Counts the given memory region in the task/worker counting
   1.903 +  // data structures for the given worker id.
   1.904 +  inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
   1.905 +
   1.906 +  // Counts the given memory region in the task/worker counting
   1.907 +  // data structures for the given worker id.
   1.908 +  inline void count_region(MemRegion mr, uint worker_id);
   1.909 +
   1.910 +  // Counts the given object in the given task/worker counting
   1.911 +  // data structures.
   1.912 +  inline void count_object(oop obj, HeapRegion* hr,
   1.913 +                           size_t* marked_bytes_array,
   1.914 +                           BitMap* task_card_bm);
   1.915 +
   1.916 +  // Counts the given object in the task/worker counting data
   1.917 +  // structures for the given worker id.
   1.918 +  inline void count_object(oop obj, HeapRegion* hr, uint worker_id);
   1.919 +
   1.920 +  // Attempts to mark the given object and, if successful, counts
   1.921 +  // the object in the given task/worker counting structures.
   1.922 +  inline bool par_mark_and_count(oop obj, HeapRegion* hr,
   1.923 +                                 size_t* marked_bytes_array,
   1.924 +                                 BitMap* task_card_bm);
   1.925 +
   1.926 +  // Attempts to mark the given object and, if successful, counts
   1.927 +  // the object in the task/worker counting structures for the
   1.928 +  // given worker id.
   1.929 +  inline bool par_mark_and_count(oop obj, size_t word_size,
   1.930 +                                 HeapRegion* hr, uint worker_id);
   1.931 +
   1.932 +  // Attempts to mark the given object and, if successful, counts
   1.933 +  // the object in the task/worker counting structures for the
   1.934 +  // given worker id.
   1.935 +  inline bool par_mark_and_count(oop obj, HeapRegion* hr, uint worker_id);
   1.936 +
   1.937 +  // Similar to the above routine but we don't know the heap region that
   1.938 +  // contains the object to be marked/counted, which this routine looks up.
   1.939 +  inline bool par_mark_and_count(oop obj, uint worker_id);
   1.940 +
   1.941 +  // Similar to the above routine but there are times when we cannot
   1.942 +  // safely calculate the size of obj due to races and we, therefore,
   1.943 +  // pass the size in as a parameter. It is the caller's reponsibility
   1.944 +  // to ensure that the size passed in for obj is valid.
   1.945 +  inline bool par_mark_and_count(oop obj, size_t word_size, uint worker_id);
   1.946 +
   1.947 +  // Unconditionally mark the given object, and unconditinally count
   1.948 +  // the object in the counting structures for worker id 0.
   1.949 +  // Should *not* be called from parallel code.
   1.950 +  inline bool mark_and_count(oop obj, HeapRegion* hr);
   1.951 +
   1.952 +  // Similar to the above routine but we don't know the heap region that
   1.953 +  // contains the object to be marked/counted, which this routine looks up.
   1.954 +  // Should *not* be called from parallel code.
   1.955 +  inline bool mark_and_count(oop obj);
   1.956 +
   1.957 +  // Returns true if initialization was successfully completed.
   1.958 +  bool completed_initialization() const {
   1.959 +    return _completed_initialization;
   1.960 +  }
   1.961 +
   1.962 +protected:
   1.963 +  // Clear all the per-task bitmaps and arrays used to store the
   1.964 +  // counting data.
   1.965 +  void clear_all_count_data();
   1.966 +
   1.967 +  // Aggregates the counting data for each worker/task
   1.968 +  // that was constructed while marking. Also sets
   1.969 +  // the amount of marked bytes for each region and
   1.970 +  // the top at concurrent mark count.
   1.971 +  void aggregate_count_data();
   1.972 +
   1.973 +  // Verification routine
   1.974 +  void verify_count_data();
   1.975 +};
   1.976 +
   1.977 +// A class representing a marking task.
   1.978 +class CMTask : public TerminatorTerminator {
   1.979 +private:
   1.980 +  enum PrivateConstants {
   1.981 +    // the regular clock call is called once the scanned words reaches
   1.982 +    // this limit
   1.983 +    words_scanned_period          = 12*1024,
   1.984 +    // the regular clock call is called once the number of visited
   1.985 +    // references reaches this limit
   1.986 +    refs_reached_period           = 384,
   1.987 +    // initial value for the hash seed, used in the work stealing code
   1.988 +    init_hash_seed                = 17,
   1.989 +    // how many entries will be transferred between global stack and
   1.990 +    // local queues
   1.991 +    global_stack_transfer_size    = 16
   1.992 +  };
   1.993 +
   1.994 +  uint                        _worker_id;
   1.995 +  G1CollectedHeap*            _g1h;
   1.996 +  ConcurrentMark*             _cm;
   1.997 +  CMBitMap*                   _nextMarkBitMap;
   1.998 +  // the task queue of this task
   1.999 +  CMTaskQueue*                _task_queue;
  1.1000 +private:
  1.1001 +  // the task queue set---needed for stealing
  1.1002 +  CMTaskQueueSet*             _task_queues;
  1.1003 +  // indicates whether the task has been claimed---this is only  for
  1.1004 +  // debugging purposes
  1.1005 +  bool                        _claimed;
  1.1006 +
  1.1007 +  // number of calls to this task
  1.1008 +  int                         _calls;
  1.1009 +
  1.1010 +  // when the virtual timer reaches this time, the marking step should
  1.1011 +  // exit
  1.1012 +  double                      _time_target_ms;
  1.1013 +  // the start time of the current marking step
  1.1014 +  double                      _start_time_ms;
  1.1015 +
  1.1016 +  // the oop closure used for iterations over oops
  1.1017 +  G1CMOopClosure*             _cm_oop_closure;
  1.1018 +
  1.1019 +  // the region this task is scanning, NULL if we're not scanning any
  1.1020 +  HeapRegion*                 _curr_region;
  1.1021 +  // the local finger of this task, NULL if we're not scanning a region
  1.1022 +  HeapWord*                   _finger;
  1.1023 +  // limit of the region this task is scanning, NULL if we're not scanning one
  1.1024 +  HeapWord*                   _region_limit;
  1.1025 +
  1.1026 +  // the number of words this task has scanned
  1.1027 +  size_t                      _words_scanned;
  1.1028 +  // When _words_scanned reaches this limit, the regular clock is
  1.1029 +  // called. Notice that this might be decreased under certain
  1.1030 +  // circumstances (i.e. when we believe that we did an expensive
  1.1031 +  // operation).
  1.1032 +  size_t                      _words_scanned_limit;
  1.1033 +  // the initial value of _words_scanned_limit (i.e. what it was
  1.1034 +  // before it was decreased).
  1.1035 +  size_t                      _real_words_scanned_limit;
  1.1036 +
  1.1037 +  // the number of references this task has visited
  1.1038 +  size_t                      _refs_reached;
  1.1039 +  // When _refs_reached reaches this limit, the regular clock is
  1.1040 +  // called. Notice this this might be decreased under certain
  1.1041 +  // circumstances (i.e. when we believe that we did an expensive
  1.1042 +  // operation).
  1.1043 +  size_t                      _refs_reached_limit;
  1.1044 +  // the initial value of _refs_reached_limit (i.e. what it was before
  1.1045 +  // it was decreased).
  1.1046 +  size_t                      _real_refs_reached_limit;
  1.1047 +
  1.1048 +  // used by the work stealing stuff
  1.1049 +  int                         _hash_seed;
  1.1050 +  // if this is true, then the task has aborted for some reason
  1.1051 +  bool                        _has_aborted;
  1.1052 +  // set when the task aborts because it has met its time quota
  1.1053 +  bool                        _has_timed_out;
  1.1054 +  // true when we're draining SATB buffers; this avoids the task
  1.1055 +  // aborting due to SATB buffers being available (as we're already
  1.1056 +  // dealing with them)
  1.1057 +  bool                        _draining_satb_buffers;
  1.1058 +
  1.1059 +  // number sequence of past step times
  1.1060 +  NumberSeq                   _step_times_ms;
  1.1061 +  // elapsed time of this task
  1.1062 +  double                      _elapsed_time_ms;
  1.1063 +  // termination time of this task
  1.1064 +  double                      _termination_time_ms;
  1.1065 +  // when this task got into the termination protocol
  1.1066 +  double                      _termination_start_time_ms;
  1.1067 +
  1.1068 +  // true when the task is during a concurrent phase, false when it is
  1.1069 +  // in the remark phase (so, in the latter case, we do not have to
  1.1070 +  // check all the things that we have to check during the concurrent
  1.1071 +  // phase, i.e. SATB buffer availability...)
  1.1072 +  bool                        _concurrent;
  1.1073 +
  1.1074 +  TruncatedSeq                _marking_step_diffs_ms;
  1.1075 +
  1.1076 +  // Counting data structures. Embedding the task's marked_bytes_array
  1.1077 +  // and card bitmap into the actual task saves having to go through
  1.1078 +  // the ConcurrentMark object.
  1.1079 +  size_t*                     _marked_bytes_array;
  1.1080 +  BitMap*                     _card_bm;
  1.1081 +
  1.1082 +  // LOTS of statistics related with this task
  1.1083 +#if _MARKING_STATS_
  1.1084 +  NumberSeq                   _all_clock_intervals_ms;
  1.1085 +  double                      _interval_start_time_ms;
  1.1086 +
  1.1087 +  int                         _aborted;
  1.1088 +  int                         _aborted_overflow;
  1.1089 +  int                         _aborted_cm_aborted;
  1.1090 +  int                         _aborted_yield;
  1.1091 +  int                         _aborted_timed_out;
  1.1092 +  int                         _aborted_satb;
  1.1093 +  int                         _aborted_termination;
  1.1094 +
  1.1095 +  int                         _steal_attempts;
  1.1096 +  int                         _steals;
  1.1097 +
  1.1098 +  int                         _clock_due_to_marking;
  1.1099 +  int                         _clock_due_to_scanning;
  1.1100 +
  1.1101 +  int                         _local_pushes;
  1.1102 +  int                         _local_pops;
  1.1103 +  int                         _local_max_size;
  1.1104 +  int                         _objs_scanned;
  1.1105 +
  1.1106 +  int                         _global_pushes;
  1.1107 +  int                         _global_pops;
  1.1108 +  int                         _global_max_size;
  1.1109 +
  1.1110 +  int                         _global_transfers_to;
  1.1111 +  int                         _global_transfers_from;
  1.1112 +
  1.1113 +  int                         _regions_claimed;
  1.1114 +  int                         _objs_found_on_bitmap;
  1.1115 +
  1.1116 +  int                         _satb_buffers_processed;
  1.1117 +#endif // _MARKING_STATS_
  1.1118 +
  1.1119 +  // it updates the local fields after this task has claimed
  1.1120 +  // a new region to scan
  1.1121 +  void setup_for_region(HeapRegion* hr);
  1.1122 +  // it brings up-to-date the limit of the region
  1.1123 +  void update_region_limit();
  1.1124 +
  1.1125 +  // called when either the words scanned or the refs visited limit
  1.1126 +  // has been reached
  1.1127 +  void reached_limit();
  1.1128 +  // recalculates the words scanned and refs visited limits
  1.1129 +  void recalculate_limits();
  1.1130 +  // decreases the words scanned and refs visited limits when we reach
  1.1131 +  // an expensive operation
  1.1132 +  void decrease_limits();
  1.1133 +  // it checks whether the words scanned or refs visited reached their
  1.1134 +  // respective limit and calls reached_limit() if they have
  1.1135 +  void check_limits() {
  1.1136 +    if (_words_scanned >= _words_scanned_limit ||
  1.1137 +        _refs_reached >= _refs_reached_limit) {
  1.1138 +      reached_limit();
  1.1139 +    }
  1.1140 +  }
  1.1141 +  // this is supposed to be called regularly during a marking step as
  1.1142 +  // it checks a bunch of conditions that might cause the marking step
  1.1143 +  // to abort
  1.1144 +  void regular_clock_call();
  1.1145 +  bool concurrent() { return _concurrent; }
  1.1146 +
  1.1147 +public:
  1.1148 +  // It resets the task; it should be called right at the beginning of
  1.1149 +  // a marking phase.
  1.1150 +  void reset(CMBitMap* _nextMarkBitMap);
  1.1151 +  // it clears all the fields that correspond to a claimed region.
  1.1152 +  void clear_region_fields();
  1.1153 +
  1.1154 +  void set_concurrent(bool concurrent) { _concurrent = concurrent; }
  1.1155 +
  1.1156 +  // The main method of this class which performs a marking step
  1.1157 +  // trying not to exceed the given duration. However, it might exit
  1.1158 +  // prematurely, according to some conditions (i.e. SATB buffers are
  1.1159 +  // available for processing).
  1.1160 +  void do_marking_step(double target_ms,
  1.1161 +                       bool do_termination,
  1.1162 +                       bool is_serial);
  1.1163 +
  1.1164 +  // These two calls start and stop the timer
  1.1165 +  void record_start_time() {
  1.1166 +    _elapsed_time_ms = os::elapsedTime() * 1000.0;
  1.1167 +  }
  1.1168 +  void record_end_time() {
  1.1169 +    _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
  1.1170 +  }
  1.1171 +
  1.1172 +  // returns the worker ID associated with this task.
  1.1173 +  uint worker_id() { return _worker_id; }
  1.1174 +
  1.1175 +  // From TerminatorTerminator. It determines whether this task should
  1.1176 +  // exit the termination protocol after it's entered it.
  1.1177 +  virtual bool should_exit_termination();
  1.1178 +
  1.1179 +  // Resets the local region fields after a task has finished scanning a
  1.1180 +  // region; or when they have become stale as a result of the region
  1.1181 +  // being evacuated.
  1.1182 +  void giveup_current_region();
  1.1183 +
  1.1184 +  HeapWord* finger()            { return _finger; }
  1.1185 +
  1.1186 +  bool has_aborted()            { return _has_aborted; }
  1.1187 +  void set_has_aborted()        { _has_aborted = true; }
  1.1188 +  void clear_has_aborted()      { _has_aborted = false; }
  1.1189 +  bool has_timed_out()          { return _has_timed_out; }
  1.1190 +  bool claimed()                { return _claimed; }
  1.1191 +
  1.1192 +  void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
  1.1193 +
  1.1194 +  // It grays the object by marking it and, if necessary, pushing it
  1.1195 +  // on the local queue
  1.1196 +  inline void deal_with_reference(oop obj);
  1.1197 +
  1.1198 +  // It scans an object and visits its children.
  1.1199 +  void scan_object(oop obj);
  1.1200 +
  1.1201 +  // It pushes an object on the local queue.
  1.1202 +  inline void push(oop obj);
  1.1203 +
  1.1204 +  // These two move entries to/from the global stack.
  1.1205 +  void move_entries_to_global_stack();
  1.1206 +  void get_entries_from_global_stack();
  1.1207 +
  1.1208 +  // It pops and scans objects from the local queue. If partially is
  1.1209 +  // true, then it stops when the queue size is of a given limit. If
  1.1210 +  // partially is false, then it stops when the queue is empty.
  1.1211 +  void drain_local_queue(bool partially);
  1.1212 +  // It moves entries from the global stack to the local queue and
  1.1213 +  // drains the local queue. If partially is true, then it stops when
  1.1214 +  // both the global stack and the local queue reach a given size. If
  1.1215 +  // partially if false, it tries to empty them totally.
  1.1216 +  void drain_global_stack(bool partially);
  1.1217 +  // It keeps picking SATB buffers and processing them until no SATB
  1.1218 +  // buffers are available.
  1.1219 +  void drain_satb_buffers();
  1.1220 +
  1.1221 +  // moves the local finger to a new location
  1.1222 +  inline void move_finger_to(HeapWord* new_finger) {
  1.1223 +    assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
  1.1224 +    _finger = new_finger;
  1.1225 +  }
  1.1226 +
  1.1227 +  CMTask(uint worker_id, ConcurrentMark *cm,
  1.1228 +         size_t* marked_bytes, BitMap* card_bm,
  1.1229 +         CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
  1.1230 +
  1.1231 +  // it prints statistics associated with this task
  1.1232 +  void print_stats();
  1.1233 +
  1.1234 +#if _MARKING_STATS_
  1.1235 +  void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
  1.1236 +#endif // _MARKING_STATS_
  1.1237 +};
  1.1238 +
  1.1239 +// Class that's used to to print out per-region liveness
  1.1240 +// information. It's currently used at the end of marking and also
  1.1241 +// after we sort the old regions at the end of the cleanup operation.
  1.1242 +class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
  1.1243 +private:
  1.1244 +  outputStream* _out;
  1.1245 +
  1.1246 +  // Accumulators for these values.
  1.1247 +  size_t _total_used_bytes;
  1.1248 +  size_t _total_capacity_bytes;
  1.1249 +  size_t _total_prev_live_bytes;
  1.1250 +  size_t _total_next_live_bytes;
  1.1251 +
  1.1252 +  // These are set up when we come across a "stars humongous" region
  1.1253 +  // (as this is where most of this information is stored, not in the
  1.1254 +  // subsequent "continues humongous" regions). After that, for every
  1.1255 +  // region in a given humongous region series we deduce the right
  1.1256 +  // values for it by simply subtracting the appropriate amount from
  1.1257 +  // these fields. All these values should reach 0 after we've visited
  1.1258 +  // the last region in the series.
  1.1259 +  size_t _hum_used_bytes;
  1.1260 +  size_t _hum_capacity_bytes;
  1.1261 +  size_t _hum_prev_live_bytes;
  1.1262 +  size_t _hum_next_live_bytes;
  1.1263 +
  1.1264 +  // Accumulator for the remembered set size
  1.1265 +  size_t _total_remset_bytes;
  1.1266 +
  1.1267 +  // Accumulator for strong code roots memory size
  1.1268 +  size_t _total_strong_code_roots_bytes;
  1.1269 +
  1.1270 +  static double perc(size_t val, size_t total) {
  1.1271 +    if (total == 0) {
  1.1272 +      return 0.0;
  1.1273 +    } else {
  1.1274 +      return 100.0 * ((double) val / (double) total);
  1.1275 +    }
  1.1276 +  }
  1.1277 +
  1.1278 +  static double bytes_to_mb(size_t val) {
  1.1279 +    return (double) val / (double) M;
  1.1280 +  }
  1.1281 +
  1.1282 +  // See the .cpp file.
  1.1283 +  size_t get_hum_bytes(size_t* hum_bytes);
  1.1284 +  void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
  1.1285 +                     size_t* prev_live_bytes, size_t* next_live_bytes);
  1.1286 +
  1.1287 +public:
  1.1288 +  // The header and footer are printed in the constructor and
  1.1289 +  // destructor respectively.
  1.1290 +  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name);
  1.1291 +  virtual bool doHeapRegion(HeapRegion* r);
  1.1292 +  ~G1PrintRegionLivenessInfoClosure();
  1.1293 +};
  1.1294 +
  1.1295 +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP

mercurial