src/share/vm/gc_implementation/g1/concurrentMark.hpp

Mon, 03 Aug 2009 12:59:30 -0700

author
johnc
date
Mon, 03 Aug 2009 12:59:30 -0700
changeset 1324
15c5903cf9e1
parent 1280
df6caf649ff7
child 1371
e1fdf4fd34dc
permissions
-rw-r--r--

6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 class G1CollectedHeap;
    26 class CMTask;
    27 typedef GenericTaskQueue<oop> CMTaskQueue;
    28 typedef GenericTaskQueueSet<oop> CMTaskQueueSet;
    30 // A generic CM bit map.  This is essentially a wrapper around the BitMap
    31 // class, with one bit per (1<<_shifter) HeapWords.
    33 class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
    34  protected:
    35   HeapWord* _bmStartWord;      // base address of range covered by map
    36   size_t    _bmWordSize;       // map size (in #HeapWords covered)
    37   const int _shifter;          // map to char or bit
    38   VirtualSpace _virtual_space; // underlying the bit map
    39   BitMap    _bm;               // the bit map itself
    41  public:
    42   // constructor
    43   CMBitMapRO(ReservedSpace rs, int shifter);
    45   enum { do_yield = true };
    47   // inquiries
    48   HeapWord* startWord()   const { return _bmStartWord; }
    49   size_t    sizeInWords() const { return _bmWordSize;  }
    50   // the following is one past the last word in space
    51   HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
    53   // read marks
    55   bool isMarked(HeapWord* addr) const {
    56     assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
    57            "outside underlying space?");
    58     return _bm.at(heapWordToOffset(addr));
    59   }
    61   // iteration
    62   bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
    63   bool iterate(BitMapClosure* cl, MemRegion mr);
    65   // Return the address corresponding to the next marked bit at or after
    66   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
    67   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
    68   HeapWord* getNextMarkedWordAddress(HeapWord* addr,
    69                                      HeapWord* limit = NULL) const;
    70   // Return the address corresponding to the next unmarked bit at or after
    71   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
    72   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
    73   HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
    74                                        HeapWord* limit = NULL) const;
    76   // conversion utilities
    77   // XXX Fix these so that offsets are size_t's...
    78   HeapWord* offsetToHeapWord(size_t offset) const {
    79     return _bmStartWord + (offset << _shifter);
    80   }
    81   size_t heapWordToOffset(HeapWord* addr) const {
    82     return pointer_delta(addr, _bmStartWord) >> _shifter;
    83   }
    84   int heapWordDiffToOffsetDiff(size_t diff) const;
    85   HeapWord* nextWord(HeapWord* addr) {
    86     return offsetToHeapWord(heapWordToOffset(addr) + 1);
    87   }
    89   void mostly_disjoint_range_union(BitMap*   from_bitmap,
    90                                    size_t    from_start_index,
    91                                    HeapWord* to_start_word,
    92                                    size_t    word_num);
    94   // debugging
    95   NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
    96 };
    98 class CMBitMap : public CMBitMapRO {
   100  public:
   101   // constructor
   102   CMBitMap(ReservedSpace rs, int shifter) :
   103     CMBitMapRO(rs, shifter) {}
   105   // write marks
   106   void mark(HeapWord* addr) {
   107     assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   108            "outside underlying space?");
   109     _bm.at_put(heapWordToOffset(addr), true);
   110   }
   111   void clear(HeapWord* addr) {
   112     assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   113            "outside underlying space?");
   114     _bm.at_put(heapWordToOffset(addr), false);
   115   }
   116   bool parMark(HeapWord* addr) {
   117     assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   118            "outside underlying space?");
   119     return _bm.par_at_put(heapWordToOffset(addr), true);
   120   }
   121   bool parClear(HeapWord* addr) {
   122     assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
   123            "outside underlying space?");
   124     return _bm.par_at_put(heapWordToOffset(addr), false);
   125   }
   126   void markRange(MemRegion mr);
   127   void clearAll();
   128   void clearRange(MemRegion mr);
   130   // Starting at the bit corresponding to "addr" (inclusive), find the next
   131   // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
   132   // the end of this run (stopping at "end_addr").  Return the MemRegion
   133   // covering from the start of the region corresponding to the first bit
   134   // of the run to the end of the region corresponding to the last bit of
   135   // the run.  If there is no "1" bit at or after "addr", return an empty
   136   // MemRegion.
   137   MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
   138 };
   140 // Represents a marking stack used by the CM collector.
   141 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
   142 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   143   ConcurrentMark* _cm;
   144   oop*   _base;      // bottom of stack
   145   jint   _index;     // one more than last occupied index
   146   jint   _capacity;  // max #elements
   147   jint   _oops_do_bound;  // Number of elements to include in next iteration.
   148   NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
   150   bool   _overflow;
   151   DEBUG_ONLY(bool _drain_in_progress;)
   152   DEBUG_ONLY(bool _drain_in_progress_yields;)
   154  public:
   155   CMMarkStack(ConcurrentMark* cm);
   156   ~CMMarkStack();
   158   void allocate(size_t size);
   160   oop pop() {
   161     if (!isEmpty()) {
   162       return _base[--_index] ;
   163     }
   164     return NULL;
   165   }
   167   // If overflow happens, don't do the push, and record the overflow.
   168   // *Requires* that "ptr" is already marked.
   169   void push(oop ptr) {
   170     if (isFull()) {
   171       // Record overflow.
   172       _overflow = true;
   173       return;
   174     } else {
   175       _base[_index++] = ptr;
   176       NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
   177     }
   178   }
   179   // Non-block impl.  Note: concurrency is allowed only with other
   180   // "par_push" operations, not with "pop" or "drain".  We would need
   181   // parallel versions of them if such concurrency was desired.
   182   void par_push(oop ptr);
   184   // Pushes the first "n" elements of "ptr_arr" on the stack.
   185   // Non-block impl.  Note: concurrency is allowed only with other
   186   // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
   187   void par_adjoin_arr(oop* ptr_arr, int n);
   189   // Pushes the first "n" elements of "ptr_arr" on the stack.
   190   // Locking impl: concurrency is allowed only with
   191   // "par_push_arr" and/or "par_pop_arr" operations, which use the same
   192   // locking strategy.
   193   void par_push_arr(oop* ptr_arr, int n);
   195   // If returns false, the array was empty.  Otherwise, removes up to "max"
   196   // elements from the stack, and transfers them to "ptr_arr" in an
   197   // unspecified order.  The actual number transferred is given in "n" ("n
   198   // == 0" is deliberately redundant with the return value.)  Locking impl:
   199   // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
   200   // operations, which use the same locking strategy.
   201   bool par_pop_arr(oop* ptr_arr, int max, int* n);
   203   // Drain the mark stack, applying the given closure to all fields of
   204   // objects on the stack.  (That is, continue until the stack is empty,
   205   // even if closure applications add entries to the stack.)  The "bm"
   206   // argument, if non-null, may be used to verify that only marked objects
   207   // are on the mark stack.  If "yield_after" is "true", then the
   208   // concurrent marker performing the drain offers to yield after
   209   // processing each object.  If a yield occurs, stops the drain operation
   210   // and returns false.  Otherwise, returns true.
   211   template<class OopClosureClass>
   212   bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
   214   bool isEmpty()    { return _index == 0; }
   215   bool isFull()     { return _index == _capacity; }
   216   int maxElems()    { return _capacity; }
   218   bool overflow() { return _overflow; }
   219   void clear_overflow() { _overflow = false; }
   221   int  size() { return _index; }
   223   void setEmpty()   { _index = 0; clear_overflow(); }
   225   // Record the current size; a subsequent "oops_do" will iterate only over
   226   // indices valid at the time of this call.
   227   void set_oops_do_bound(jint bound = -1) {
   228     if (bound == -1) {
   229       _oops_do_bound = _index;
   230     } else {
   231       _oops_do_bound = bound;
   232     }
   233   }
   234   jint oops_do_bound() { return _oops_do_bound; }
   235   // iterate over the oops in the mark stack, up to the bound recorded via
   236   // the call above.
   237   void oops_do(OopClosure* f);
   238 };
   240 class CMRegionStack VALUE_OBJ_CLASS_SPEC {
   241   MemRegion* _base;
   242   jint _capacity;
   243   jint _index;
   244   jint _oops_do_bound;
   245   bool _overflow;
   246 public:
   247   CMRegionStack();
   248   ~CMRegionStack();
   249   void allocate(size_t size);
   251   // This is lock-free; assumes that it will only be called in parallel
   252   // with other "push" operations (no pops).
   253   void push(MemRegion mr);
   255   // Lock-free; assumes that it will only be called in parallel
   256   // with other "pop" operations (no pushes).
   257   MemRegion pop();
   259   bool isEmpty()    { return _index == 0; }
   260   bool isFull()     { return _index == _capacity; }
   262   bool overflow() { return _overflow; }
   263   void clear_overflow() { _overflow = false; }
   265   int  size() { return _index; }
   267   // It iterates over the entries in the region stack and it
   268   // invalidates (i.e. assigns MemRegion()) the ones that point to
   269   // regions in the collection set.
   270   bool invalidate_entries_into_cset();
   272   // This gives an upper bound up to which the iteration in
   273   // invalidate_entries_into_cset() will reach. This prevents
   274   // newly-added entries to be unnecessarily scanned.
   275   void set_oops_do_bound() {
   276     _oops_do_bound = _index;
   277   }
   279   void setEmpty()   { _index = 0; clear_overflow(); }
   280 };
   282 // this will enable a variety of different statistics per GC task
   283 #define _MARKING_STATS_       0
   284 // this will enable the higher verbose levels
   285 #define _MARKING_VERBOSE_     0
   287 #if _MARKING_STATS_
   288 #define statsOnly(statement)  \
   289 do {                          \
   290   statement ;                 \
   291 } while (0)
   292 #else // _MARKING_STATS_
   293 #define statsOnly(statement)  \
   294 do {                          \
   295 } while (0)
   296 #endif // _MARKING_STATS_
   298 // Some extra guarantees that I like to also enable in optimised mode
   299 // when debugging. If you want to enable them, comment out the assert
   300 // macro and uncomment out the guaratee macro
   301 // #define tmp_guarantee_CM(expr, str) guarantee(expr, str)
   302 #define tmp_guarantee_CM(expr, str) assert(expr, str)
   304 typedef enum {
   305   no_verbose  = 0,   // verbose turned off
   306   stats_verbose,     // only prints stats at the end of marking
   307   low_verbose,       // low verbose, mostly per region and per major event
   308   medium_verbose,    // a bit more detailed than low
   309   high_verbose       // per object verbose
   310 } CMVerboseLevel;
   313 class ConcurrentMarkThread;
   315 class ConcurrentMark: public CHeapObj {
   316   friend class ConcurrentMarkThread;
   317   friend class CMTask;
   318   friend class CMBitMapClosure;
   319   friend class CSMarkOopClosure;
   320   friend class CMGlobalObjectClosure;
   321   friend class CMRemarkTask;
   322   friend class CMConcurrentMarkingTask;
   323   friend class G1ParNoteEndTask;
   324   friend class CalcLiveObjectsClosure;
   326 protected:
   327   ConcurrentMarkThread* _cmThread;   // the thread doing the work
   328   G1CollectedHeap*      _g1h;        // the heap.
   329   size_t                _parallel_marking_threads; // the number of marking
   330                                                    // threads we'll use
   331   double                _sleep_factor; // how much we have to sleep, with
   332                                        // respect to the work we just did, to
   333                                        // meet the marking overhead goal
   334   double                _marking_task_overhead; // marking target overhead for
   335                                                 // a single task
   337   // same as the two above, but for the cleanup task
   338   double                _cleanup_sleep_factor;
   339   double                _cleanup_task_overhead;
   341   // Stuff related to age cohort processing.
   342   struct ParCleanupThreadState {
   343     char _pre[64];
   344     UncleanRegionList list;
   345     char _post[64];
   346   };
   347   ParCleanupThreadState** _par_cleanup_thread_state;
   349   // CMS marking support structures
   350   CMBitMap                _markBitMap1;
   351   CMBitMap                _markBitMap2;
   352   CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
   353   CMBitMap*               _nextMarkBitMap; // under-construction mark bitmap
   354   bool                    _at_least_one_mark_complete;
   356   BitMap                  _region_bm;
   357   BitMap                  _card_bm;
   359   // Heap bounds
   360   HeapWord*               _heap_start;
   361   HeapWord*               _heap_end;
   363   // For gray objects
   364   CMMarkStack             _markStack; // Grey objects behind global finger.
   365   CMRegionStack           _regionStack; // Grey regions behind global finger.
   366   HeapWord* volatile      _finger;  // the global finger, region aligned,
   367                                     // always points to the end of the
   368                                     // last claimed region
   370   // marking tasks
   371   size_t                  _max_task_num; // maximum task number
   372   size_t                  _active_tasks; // task num currently active
   373   CMTask**                _tasks;        // task queue array (max_task_num len)
   374   CMTaskQueueSet*         _task_queues;  // task queue set
   375   ParallelTaskTerminator  _terminator;   // for termination
   377   // Two sync barriers that are used to synchronise tasks when an
   378   // overflow occurs. The algorithm is the following. All tasks enter
   379   // the first one to ensure that they have all stopped manipulating
   380   // the global data structures. After they exit it, they re-initialise
   381   // their data structures and task 0 re-initialises the global data
   382   // structures. Then, they enter the second sync barrier. This
   383   // ensure, that no task starts doing work before all data
   384   // structures (local and global) have been re-initialised. When they
   385   // exit it, they are free to start working again.
   386   WorkGangBarrierSync     _first_overflow_barrier_sync;
   387   WorkGangBarrierSync     _second_overflow_barrier_sync;
   390   // this is set by any task, when an overflow on the global data
   391   // structures is detected.
   392   volatile bool           _has_overflown;
   393   // true: marking is concurrent, false: we're in remark
   394   volatile bool           _concurrent;
   395   // set at the end of a Full GC so that marking aborts
   396   volatile bool           _has_aborted;
   397   // used when remark aborts due to an overflow to indicate that
   398   // another concurrent marking phase should start
   399   volatile bool           _restart_for_overflow;
   401   // This is true from the very start of concurrent marking until the
   402   // point when all the tasks complete their work. It is really used
   403   // to determine the points between the end of concurrent marking and
   404   // time of remark.
   405   volatile bool           _concurrent_marking_in_progress;
   407   // verbose level
   408   CMVerboseLevel          _verbose_level;
   410   COTracker               _cleanup_co_tracker;
   412   // These two fields are used to implement the optimisation that
   413   // avoids pushing objects on the global/region stack if there are
   414   // no collection set regions above the lowest finger.
   416   // This is the lowest finger (among the global and local fingers),
   417   // which is calculated before a new collection set is chosen.
   418   HeapWord* _min_finger;
   419   // If this flag is true, objects/regions that are marked below the
   420   // finger should be pushed on the stack(s). If this is flag is
   421   // false, it is safe not to push them on the stack(s).
   422   bool      _should_gray_objects;
   424   // All of these times are in ms.
   425   NumberSeq _init_times;
   426   NumberSeq _remark_times;
   427   NumberSeq   _remark_mark_times;
   428   NumberSeq   _remark_weak_ref_times;
   429   NumberSeq _cleanup_times;
   430   double    _total_counting_time;
   431   double    _total_rs_scrub_time;
   433   double*   _accum_task_vtime;   // accumulated task vtime
   435   WorkGang* _parallel_workers;
   437   void weakRefsWork(bool clear_all_soft_refs);
   439   void swapMarkBitMaps();
   441   // It resets the global marking data structures, as well as the
   442   // task local ones; should be called during initial mark.
   443   void reset();
   444   // It resets all the marking data structures.
   445   void clear_marking_state();
   447   // It should be called to indicate which phase we're in (concurrent
   448   // mark or remark) and how many threads are currently active.
   449   void set_phase(size_t active_tasks, bool concurrent);
   450   // We do this after we're done with marking so that the marking data
   451   // structures are initialised to a sensible and predictable state.
   452   void set_non_marking_state();
   454   // prints all gathered CM-related statistics
   455   void print_stats();
   457   // accessor methods
   458   size_t parallel_marking_threads() { return _parallel_marking_threads; }
   459   double sleep_factor()             { return _sleep_factor; }
   460   double marking_task_overhead()    { return _marking_task_overhead;}
   461   double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
   462   double cleanup_task_overhead()    { return _cleanup_task_overhead;}
   464   HeapWord*               finger()        { return _finger;   }
   465   bool                    concurrent()    { return _concurrent; }
   466   size_t                  active_tasks()  { return _active_tasks; }
   467   ParallelTaskTerminator* terminator()    { return &_terminator; }
   469   // It claims the next available region to be scanned by a marking
   470   // task. It might return NULL if the next region is empty or we have
   471   // run out of regions. In the latter case, out_of_regions()
   472   // determines whether we've really run out of regions or the task
   473   // should call claim_region() again.  This might seem a bit
   474   // awkward. Originally, the code was written so that claim_region()
   475   // either successfully returned with a non-empty region or there
   476   // were no more regions to be claimed. The problem with this was
   477   // that, in certain circumstances, it iterated over large chunks of
   478   // the heap finding only empty regions and, while it was working, it
   479   // was preventing the calling task to call its regular clock
   480   // method. So, this way, each task will spend very little time in
   481   // claim_region() and is allowed to call the regular clock method
   482   // frequently.
   483   HeapRegion* claim_region(int task);
   485   // It determines whether we've run out of regions to scan.
   486   bool        out_of_regions() { return _finger == _heap_end; }
   488   // Returns the task with the given id
   489   CMTask* task(int id) {
   490     guarantee( 0 <= id && id < (int) _active_tasks, "task id not within "
   491                "active bounds" );
   492     return _tasks[id];
   493   }
   495   // Returns the task queue with the given id
   496   CMTaskQueue* task_queue(int id) {
   497     guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within "
   498                "active bounds" );
   499     return (CMTaskQueue*) _task_queues->queue(id);
   500   }
   502   // Returns the task queue set
   503   CMTaskQueueSet* task_queues()  { return _task_queues; }
   505   // Access / manipulation of the overflow flag which is set to
   506   // indicate that the global stack or region stack has overflown
   507   bool has_overflown()           { return _has_overflown; }
   508   void set_has_overflown()       { _has_overflown = true; }
   509   void clear_has_overflown()     { _has_overflown = false; }
   511   bool has_aborted()             { return _has_aborted; }
   512   bool restart_for_overflow()    { return _restart_for_overflow; }
   514   // Methods to enter the two overflow sync barriers
   515   void enter_first_sync_barrier(int task_num);
   516   void enter_second_sync_barrier(int task_num);
   518 public:
   519   // Manipulation of the global mark stack.
   520   // Notice that the first mark_stack_push is CAS-based, whereas the
   521   // two below are Mutex-based. This is OK since the first one is only
   522   // called during evacuation pauses and doesn't compete with the
   523   // other two (which are called by the marking tasks during
   524   // concurrent marking or remark).
   525   bool mark_stack_push(oop p) {
   526     _markStack.par_push(p);
   527     if (_markStack.overflow()) {
   528       set_has_overflown();
   529       return false;
   530     }
   531     return true;
   532   }
   533   bool mark_stack_push(oop* arr, int n) {
   534     _markStack.par_push_arr(arr, n);
   535     if (_markStack.overflow()) {
   536       set_has_overflown();
   537       return false;
   538     }
   539     return true;
   540   }
   541   void mark_stack_pop(oop* arr, int max, int* n) {
   542     _markStack.par_pop_arr(arr, max, n);
   543   }
   544   size_t mark_stack_size()              { return _markStack.size(); }
   545   size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
   546   bool mark_stack_overflow()            { return _markStack.overflow(); }
   547   bool mark_stack_empty()               { return _markStack.isEmpty(); }
   549   // Manipulation of the region stack
   550   bool region_stack_push(MemRegion mr) {
   551     _regionStack.push(mr);
   552     if (_regionStack.overflow()) {
   553       set_has_overflown();
   554       return false;
   555     }
   556     return true;
   557   }
   558   MemRegion region_stack_pop()          { return _regionStack.pop(); }
   559   int region_stack_size()               { return _regionStack.size(); }
   560   bool region_stack_overflow()          { return _regionStack.overflow(); }
   561   bool region_stack_empty()             { return _regionStack.isEmpty(); }
   563   bool concurrent_marking_in_progress() {
   564     return _concurrent_marking_in_progress;
   565   }
   566   void set_concurrent_marking_in_progress() {
   567     _concurrent_marking_in_progress = true;
   568   }
   569   void clear_concurrent_marking_in_progress() {
   570     _concurrent_marking_in_progress = false;
   571   }
   573   void update_accum_task_vtime(int i, double vtime) {
   574     _accum_task_vtime[i] += vtime;
   575   }
   577   double all_task_accum_vtime() {
   578     double ret = 0.0;
   579     for (int i = 0; i < (int)_max_task_num; ++i)
   580       ret += _accum_task_vtime[i];
   581     return ret;
   582   }
   584   // Attempts to steal an object from the task queues of other tasks
   585   bool try_stealing(int task_num, int* hash_seed, oop& obj) {
   586     return _task_queues->steal(task_num, hash_seed, obj);
   587   }
   589   // It grays an object by first marking it. Then, if it's behind the
   590   // global finger, it also pushes it on the global stack.
   591   void deal_with_reference(oop obj);
   593   ConcurrentMark(ReservedSpace rs, int max_regions);
   594   ~ConcurrentMark();
   595   ConcurrentMarkThread* cmThread() { return _cmThread; }
   597   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
   598   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
   600   // The following three are interaction between CM and
   601   // G1CollectedHeap
   603   // This notifies CM that a root during initial-mark needs to be
   604   // grayed and it's MT-safe. Currently, we just mark it. But, in the
   605   // future, we can experiment with pushing it on the stack and we can
   606   // do this without changing G1CollectedHeap.
   607   void grayRoot(oop p);
   608   // It's used during evacuation pauses to gray a region, if
   609   // necessary, and it's MT-safe. It assumes that the caller has
   610   // marked any objects on that region. If _should_gray_objects is
   611   // true and we're still doing concurrent marking, the region is
   612   // pushed on the region stack, if it is located below the global
   613   // finger, otherwise we do nothing.
   614   void grayRegionIfNecessary(MemRegion mr);
   615   // It's used during evacuation pauses to mark and, if necessary,
   616   // gray a single object and it's MT-safe. It assumes the caller did
   617   // not mark the object. If _should_gray_objects is true and we're
   618   // still doing concurrent marking, the objects is pushed on the
   619   // global stack, if it is located below the global finger, otherwise
   620   // we do nothing.
   621   void markAndGrayObjectIfNecessary(oop p);
   623   // This iterates over the bitmap of the previous marking and prints
   624   // out all objects that are marked on the bitmap and indicates
   625   // whether what they point to is also marked or not.
   626   void print_prev_bitmap_reachable();
   628   // Clear the next marking bitmap (will be called concurrently).
   629   void clearNextBitmap();
   631   // main CMS steps and related support
   632   void checkpointRootsInitial();
   634   // These two do the work that needs to be done before and after the
   635   // initial root checkpoint. Since this checkpoint can be done at two
   636   // different points (i.e. an explicit pause or piggy-backed on a
   637   // young collection), then it's nice to be able to easily share the
   638   // pre/post code. It might be the case that we can put everything in
   639   // the post method. TP
   640   void checkpointRootsInitialPre();
   641   void checkpointRootsInitialPost();
   643   // Do concurrent phase of marking, to a tentative transitive closure.
   644   void markFromRoots();
   646   // Process all unprocessed SATB buffers. It is called at the
   647   // beginning of an evacuation pause.
   648   void drainAllSATBBuffers();
   650   void checkpointRootsFinal(bool clear_all_soft_refs);
   651   void checkpointRootsFinalWork();
   652   void calcDesiredRegions();
   653   void cleanup();
   654   void completeCleanup();
   656   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   657   // this carefully!
   658   void markPrev(oop p);
   659   void clear(oop p);
   660   // Clears marks for all objects in the given range, for both prev and
   661   // next bitmaps.  NB: the previous bitmap is usually read-only, so use
   662   // this carefully!
   663   void clearRangeBothMaps(MemRegion mr);
   665   // Record the current top of the mark and region stacks; a
   666   // subsequent oops_do() on the mark stack and
   667   // invalidate_entries_into_cset() on the region stack will iterate
   668   // only over indices valid at the time of this call.
   669   void set_oops_do_bound() {
   670     _markStack.set_oops_do_bound();
   671     _regionStack.set_oops_do_bound();
   672   }
   673   // Iterate over the oops in the mark stack and all local queues. It
   674   // also calls invalidate_entries_into_cset() on the region stack.
   675   void oops_do(OopClosure* f);
   676   // It is called at the end of an evacuation pause during marking so
   677   // that CM is notified of where the new end of the heap is. It
   678   // doesn't do anything if concurrent_marking_in_progress() is false,
   679   // unless the force parameter is true.
   680   void update_g1_committed(bool force = false);
   682   void complete_marking_in_collection_set();
   684   // It indicates that a new collection set is being chosen.
   685   void newCSet();
   686   // It registers a collection set heap region with CM. This is used
   687   // to determine whether any heap regions are located above the finger.
   688   void registerCSetRegion(HeapRegion* hr);
   690   // Returns "true" if at least one mark has been completed.
   691   bool at_least_one_mark_complete() { return _at_least_one_mark_complete; }
   693   bool isMarked(oop p) const {
   694     assert(p != NULL && p->is_oop(), "expected an oop");
   695     HeapWord* addr = (HeapWord*)p;
   696     assert(addr >= _nextMarkBitMap->startWord() ||
   697            addr < _nextMarkBitMap->endWord(), "in a region");
   699     return _nextMarkBitMap->isMarked(addr);
   700   }
   702   inline bool not_yet_marked(oop p) const;
   704   // XXX Debug code
   705   bool containing_card_is_marked(void* p);
   706   bool containing_cards_are_marked(void* start, void* last);
   708   bool isPrevMarked(oop p) const {
   709     assert(p != NULL && p->is_oop(), "expected an oop");
   710     HeapWord* addr = (HeapWord*)p;
   711     assert(addr >= _prevMarkBitMap->startWord() ||
   712            addr < _prevMarkBitMap->endWord(), "in a region");
   714     return _prevMarkBitMap->isMarked(addr);
   715   }
   717   inline bool do_yield_check(int worker_i = 0);
   718   inline bool should_yield();
   720   // Called to abort the marking cycle after a Full GC takes palce.
   721   void abort();
   723   void disable_co_trackers();
   725   // This prints the global/local fingers. It is used for debugging.
   726   NOT_PRODUCT(void print_finger();)
   728   void print_summary_info();
   730   // The following indicate whether a given verbose level has been
   731   // set. Notice that anything above stats is conditional to
   732   // _MARKING_VERBOSE_ having been set to 1
   733   bool verbose_stats()
   734     { return _verbose_level >= stats_verbose; }
   735   bool verbose_low()
   736     { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; }
   737   bool verbose_medium()
   738     { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; }
   739   bool verbose_high()
   740     { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; }
   741 };
   743 // A class representing a marking task.
   744 class CMTask : public TerminatorTerminator {
   745 private:
   746   enum PrivateConstants {
   747     // the regular clock call is called once the scanned words reaches
   748     // this limit
   749     words_scanned_period          = 12*1024,
   750     // the regular clock call is called once the number of visited
   751     // references reaches this limit
   752     refs_reached_period           = 384,
   753     // initial value for the hash seed, used in the work stealing code
   754     init_hash_seed                = 17,
   755     // how many entries will be transferred between global stack and
   756     // local queues
   757     global_stack_transfer_size    = 16
   758   };
   760   int                         _task_id;
   761   G1CollectedHeap*            _g1h;
   762   ConcurrentMark*             _cm;
   763   CMBitMap*                   _nextMarkBitMap;
   764   // the task queue of this task
   765   CMTaskQueue*                _task_queue;
   766 private:
   767   // the task queue set---needed for stealing
   768   CMTaskQueueSet*             _task_queues;
   769   // indicates whether the task has been claimed---this is only  for
   770   // debugging purposes
   771   bool                        _claimed;
   773   // number of calls to this task
   774   int                         _calls;
   776   // concurrent overhead over a single CPU for this task
   777   COTracker                   _co_tracker;
   779   // when the virtual timer reaches this time, the marking step should
   780   // exit
   781   double                      _time_target_ms;
   782   // the start time of the current marking step
   783   double                      _start_time_ms;
   785   // the oop closure used for iterations over oops
   786   OopClosure*                 _oop_closure;
   788   // the region this task is scanning, NULL if we're not scanning any
   789   HeapRegion*                 _curr_region;
   790   // the local finger of this task, NULL if we're not scanning a region
   791   HeapWord*                   _finger;
   792   // limit of the region this task is scanning, NULL if we're not scanning one
   793   HeapWord*                   _region_limit;
   795   // This is used only when we scan regions popped from the region
   796   // stack. It records what the last object on such a region we
   797   // scanned was. It is used to ensure that, if we abort region
   798   // iteration, we do not rescan the first part of the region. This
   799   // should be NULL when we're not scanning a region from the region
   800   // stack.
   801   HeapWord*                   _region_finger;
   803   // the number of words this task has scanned
   804   size_t                      _words_scanned;
   805   // When _words_scanned reaches this limit, the regular clock is
   806   // called. Notice that this might be decreased under certain
   807   // circumstances (i.e. when we believe that we did an expensive
   808   // operation).
   809   size_t                      _words_scanned_limit;
   810   // the initial value of _words_scanned_limit (i.e. what it was
   811   // before it was decreased).
   812   size_t                      _real_words_scanned_limit;
   814   // the number of references this task has visited
   815   size_t                      _refs_reached;
   816   // When _refs_reached reaches this limit, the regular clock is
   817   // called. Notice this this might be decreased under certain
   818   // circumstances (i.e. when we believe that we did an expensive
   819   // operation).
   820   size_t                      _refs_reached_limit;
   821   // the initial value of _refs_reached_limit (i.e. what it was before
   822   // it was decreased).
   823   size_t                      _real_refs_reached_limit;
   825   // used by the work stealing stuff
   826   int                         _hash_seed;
   827   // if this is true, then the task has aborted for some reason
   828   bool                        _has_aborted;
   829   // set when the task aborts because it has met its time quota
   830   bool                        _has_aborted_timed_out;
   831   // true when we're draining SATB buffers; this avoids the task
   832   // aborting due to SATB buffers being available (as we're already
   833   // dealing with them)
   834   bool                        _draining_satb_buffers;
   836   // number sequence of past step times
   837   NumberSeq                   _step_times_ms;
   838   // elapsed time of this task
   839   double                      _elapsed_time_ms;
   840   // termination time of this task
   841   double                      _termination_time_ms;
   842   // when this task got into the termination protocol
   843   double                      _termination_start_time_ms;
   845   // true when the task is during a concurrent phase, false when it is
   846   // in the remark phase (so, in the latter case, we do not have to
   847   // check all the things that we have to check during the concurrent
   848   // phase, i.e. SATB buffer availability...)
   849   bool                        _concurrent;
   851   TruncatedSeq                _marking_step_diffs_ms;
   853   // LOTS of statistics related with this task
   854 #if _MARKING_STATS_
   855   NumberSeq                   _all_clock_intervals_ms;
   856   double                      _interval_start_time_ms;
   858   int                         _aborted;
   859   int                         _aborted_overflow;
   860   int                         _aborted_cm_aborted;
   861   int                         _aborted_yield;
   862   int                         _aborted_timed_out;
   863   int                         _aborted_satb;
   864   int                         _aborted_termination;
   866   int                         _steal_attempts;
   867   int                         _steals;
   869   int                         _clock_due_to_marking;
   870   int                         _clock_due_to_scanning;
   872   int                         _local_pushes;
   873   int                         _local_pops;
   874   int                         _local_max_size;
   875   int                         _objs_scanned;
   877   int                         _global_pushes;
   878   int                         _global_pops;
   879   int                         _global_max_size;
   881   int                         _global_transfers_to;
   882   int                         _global_transfers_from;
   884   int                         _region_stack_pops;
   886   int                         _regions_claimed;
   887   int                         _objs_found_on_bitmap;
   889   int                         _satb_buffers_processed;
   890 #endif // _MARKING_STATS_
   892   // it updates the local fields after this task has claimed
   893   // a new region to scan
   894   void setup_for_region(HeapRegion* hr);
   895   // it brings up-to-date the limit of the region
   896   void update_region_limit();
   897   // it resets the local fields after a task has finished scanning a
   898   // region
   899   void giveup_current_region();
   901   // called when either the words scanned or the refs visited limit
   902   // has been reached
   903   void reached_limit();
   904   // recalculates the words scanned and refs visited limits
   905   void recalculate_limits();
   906   // decreases the words scanned and refs visited limits when we reach
   907   // an expensive operation
   908   void decrease_limits();
   909   // it checks whether the words scanned or refs visited reached their
   910   // respective limit and calls reached_limit() if they have
   911   void check_limits() {
   912     if (_words_scanned >= _words_scanned_limit ||
   913         _refs_reached >= _refs_reached_limit)
   914       reached_limit();
   915   }
   916   // this is supposed to be called regularly during a marking step as
   917   // it checks a bunch of conditions that might cause the marking step
   918   // to abort
   919   void regular_clock_call();
   920   bool concurrent() { return _concurrent; }
   922 public:
   923   // It resets the task; it should be called right at the beginning of
   924   // a marking phase.
   925   void reset(CMBitMap* _nextMarkBitMap);
   926   // it clears all the fields that correspond to a claimed region.
   927   void clear_region_fields();
   929   void set_concurrent(bool concurrent) { _concurrent = concurrent; }
   931   void enable_co_tracker() {
   932     guarantee( !_co_tracker.enabled(), "invariant" );
   933     _co_tracker.enable();
   934   }
   935   void disable_co_tracker() {
   936     guarantee( _co_tracker.enabled(), "invariant" );
   937     _co_tracker.disable();
   938   }
   939   bool co_tracker_enabled() {
   940     return _co_tracker.enabled();
   941   }
   942   void reset_co_tracker(double starting_conc_overhead = 0.0) {
   943     _co_tracker.reset(starting_conc_overhead);
   944   }
   945   void start_co_tracker() {
   946     _co_tracker.start();
   947   }
   948   void update_co_tracker(bool force_end = false) {
   949     _co_tracker.update(force_end);
   950   }
   952   // The main method of this class which performs a marking step
   953   // trying not to exceed the given duration. However, it might exit
   954   // prematurely, according to some conditions (i.e. SATB buffers are
   955   // available for processing).
   956   void do_marking_step(double target_ms);
   958   // These two calls start and stop the timer
   959   void record_start_time() {
   960     _elapsed_time_ms = os::elapsedTime() * 1000.0;
   961   }
   962   void record_end_time() {
   963     _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
   964   }
   966   // returns the task ID
   967   int task_id() { return _task_id; }
   969   // From TerminatorTerminator. It determines whether this task should
   970   // exit the termination protocol after it's entered it.
   971   virtual bool should_exit_termination();
   973   HeapWord* finger()            { return _finger; }
   975   bool has_aborted()            { return _has_aborted; }
   976   void set_has_aborted()        { _has_aborted = true; }
   977   void clear_has_aborted()      { _has_aborted = false; }
   978   bool claimed() { return _claimed; }
   980   void set_oop_closure(OopClosure* oop_closure) {
   981     _oop_closure = oop_closure;
   982   }
   984   // It grays the object by marking it and, if necessary, pushing it
   985   // on the local queue
   986   void deal_with_reference(oop obj);
   988   // It scans an object and visits its children.
   989   void scan_object(oop obj) {
   990     tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
   991                       "invariant" );
   993     if (_cm->verbose_high())
   994       gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
   995                              _task_id, (void*) obj);
   997     size_t obj_size = obj->size();
   998     _words_scanned += obj_size;
  1000     obj->oop_iterate(_oop_closure);
  1001     statsOnly( ++_objs_scanned );
  1002     check_limits();
  1005   // It pushes an object on the local queue.
  1006   void push(oop obj);
  1008   // These two move entries to/from the global stack.
  1009   void move_entries_to_global_stack();
  1010   void get_entries_from_global_stack();
  1012   // It pops and scans objects from the local queue. If partially is
  1013   // true, then it stops when the queue size is of a given limit. If
  1014   // partially is false, then it stops when the queue is empty.
  1015   void drain_local_queue(bool partially);
  1016   // It moves entries from the global stack to the local queue and
  1017   // drains the local queue. If partially is true, then it stops when
  1018   // both the global stack and the local queue reach a given size. If
  1019   // partially if false, it tries to empty them totally.
  1020   void drain_global_stack(bool partially);
  1021   // It keeps picking SATB buffers and processing them until no SATB
  1022   // buffers are available.
  1023   void drain_satb_buffers();
  1024   // It keeps popping regions from the region stack and processing
  1025   // them until the region stack is empty.
  1026   void drain_region_stack(BitMapClosure* closure);
  1028   // moves the local finger to a new location
  1029   inline void move_finger_to(HeapWord* new_finger) {
  1030     tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit,
  1031                    "invariant" );
  1032     _finger = new_finger;
  1035   // moves the region finger to a new location
  1036   inline void move_region_finger_to(HeapWord* new_finger) {
  1037     tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" );
  1038     _region_finger = new_finger;
  1041   CMTask(int task_num, ConcurrentMark *cm,
  1042          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
  1044   // it prints statistics associated with this task
  1045   void print_stats();
  1047 #if _MARKING_STATS_
  1048   void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
  1049 #endif // _MARKING_STATS_
  1050 };

mercurial