Thu, 17 Nov 2011 12:40:15 -0800
7112743: G1: Reduce overhead of marking closure during evacuation pauses
Summary: Parallelize the serial code that was used to mark objects reachable from survivor objects in the collection set. Some minor improvments in the timers used to track the freeing of the collection set along with some tweaks to PrintGCDetails.
Reviewed-by: tonyp, brutisso
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
28 #include "gc_implementation/g1/heapRegionSets.hpp"
29 #include "utilities/taskqueue.hpp"
31 class G1CollectedHeap;
32 class CMTask;
33 typedef GenericTaskQueue<oop> CMTaskQueue;
34 typedef GenericTaskQueueSet<CMTaskQueue> CMTaskQueueSet;
36 // Closure used by CM during concurrent reference discovery
37 // and reference processing (during remarking) to determine
38 // if a particular object is alive. It is primarily used
39 // to determine if referents of discovered reference objects
40 // are alive. An instance is also embedded into the
41 // reference processor as the _is_alive_non_header field
42 class G1CMIsAliveClosure: public BoolObjectClosure {
43 G1CollectedHeap* _g1;
44 public:
45 G1CMIsAliveClosure(G1CollectedHeap* g1) :
46 _g1(g1)
47 {}
49 void do_object(oop obj) {
50 ShouldNotCallThis();
51 }
52 bool do_object_b(oop obj);
53 };
55 // A generic CM bit map. This is essentially a wrapper around the BitMap
56 // class, with one bit per (1<<_shifter) HeapWords.
58 class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
59 protected:
60 HeapWord* _bmStartWord; // base address of range covered by map
61 size_t _bmWordSize; // map size (in #HeapWords covered)
62 const int _shifter; // map to char or bit
63 VirtualSpace _virtual_space; // underlying the bit map
64 BitMap _bm; // the bit map itself
66 public:
67 // constructor
68 CMBitMapRO(ReservedSpace rs, int shifter);
70 enum { do_yield = true };
72 // inquiries
73 HeapWord* startWord() const { return _bmStartWord; }
74 size_t sizeInWords() const { return _bmWordSize; }
75 // the following is one past the last word in space
76 HeapWord* endWord() const { return _bmStartWord + _bmWordSize; }
78 // read marks
80 bool isMarked(HeapWord* addr) const {
81 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
82 "outside underlying space?");
83 return _bm.at(heapWordToOffset(addr));
84 }
86 // iteration
87 bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
88 bool iterate(BitMapClosure* cl, MemRegion mr);
90 // Return the address corresponding to the next marked bit at or after
91 // "addr", and before "limit", if "limit" is non-NULL. If there is no
92 // such bit, returns "limit" if that is non-NULL, or else "endWord()".
93 HeapWord* getNextMarkedWordAddress(HeapWord* addr,
94 HeapWord* limit = NULL) const;
95 // Return the address corresponding to the next unmarked bit at or after
96 // "addr", and before "limit", if "limit" is non-NULL. If there is no
97 // such bit, returns "limit" if that is non-NULL, or else "endWord()".
98 HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
99 HeapWord* limit = NULL) const;
101 // conversion utilities
102 // XXX Fix these so that offsets are size_t's...
103 HeapWord* offsetToHeapWord(size_t offset) const {
104 return _bmStartWord + (offset << _shifter);
105 }
106 size_t heapWordToOffset(HeapWord* addr) const {
107 return pointer_delta(addr, _bmStartWord) >> _shifter;
108 }
109 int heapWordDiffToOffsetDiff(size_t diff) const;
110 HeapWord* nextWord(HeapWord* addr) {
111 return offsetToHeapWord(heapWordToOffset(addr) + 1);
112 }
114 void mostly_disjoint_range_union(BitMap* from_bitmap,
115 size_t from_start_index,
116 HeapWord* to_start_word,
117 size_t word_num);
119 // debugging
120 NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
121 };
123 class CMBitMap : public CMBitMapRO {
125 public:
126 // constructor
127 CMBitMap(ReservedSpace rs, int shifter) :
128 CMBitMapRO(rs, shifter) {}
130 // write marks
131 void mark(HeapWord* addr) {
132 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
133 "outside underlying space?");
134 _bm.set_bit(heapWordToOffset(addr));
135 }
136 void clear(HeapWord* addr) {
137 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
138 "outside underlying space?");
139 _bm.clear_bit(heapWordToOffset(addr));
140 }
141 bool parMark(HeapWord* addr) {
142 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
143 "outside underlying space?");
144 return _bm.par_set_bit(heapWordToOffset(addr));
145 }
146 bool parClear(HeapWord* addr) {
147 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
148 "outside underlying space?");
149 return _bm.par_clear_bit(heapWordToOffset(addr));
150 }
151 void markRange(MemRegion mr);
152 void clearAll();
153 void clearRange(MemRegion mr);
155 // Starting at the bit corresponding to "addr" (inclusive), find the next
156 // "1" bit, if any. This bit starts some run of consecutive "1"'s; find
157 // the end of this run (stopping at "end_addr"). Return the MemRegion
158 // covering from the start of the region corresponding to the first bit
159 // of the run to the end of the region corresponding to the last bit of
160 // the run. If there is no "1" bit at or after "addr", return an empty
161 // MemRegion.
162 MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
163 };
165 // Represents a marking stack used by the CM collector.
166 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
167 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
168 ConcurrentMark* _cm;
169 oop* _base; // bottom of stack
170 jint _index; // one more than last occupied index
171 jint _capacity; // max #elements
172 jint _oops_do_bound; // Number of elements to include in next iteration.
173 NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run
175 bool _overflow;
176 DEBUG_ONLY(bool _drain_in_progress;)
177 DEBUG_ONLY(bool _drain_in_progress_yields;)
179 public:
180 CMMarkStack(ConcurrentMark* cm);
181 ~CMMarkStack();
183 void allocate(size_t size);
185 oop pop() {
186 if (!isEmpty()) {
187 return _base[--_index] ;
188 }
189 return NULL;
190 }
192 // If overflow happens, don't do the push, and record the overflow.
193 // *Requires* that "ptr" is already marked.
194 void push(oop ptr) {
195 if (isFull()) {
196 // Record overflow.
197 _overflow = true;
198 return;
199 } else {
200 _base[_index++] = ptr;
201 NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
202 }
203 }
204 // Non-block impl. Note: concurrency is allowed only with other
205 // "par_push" operations, not with "pop" or "drain". We would need
206 // parallel versions of them if such concurrency was desired.
207 void par_push(oop ptr);
209 // Pushes the first "n" elements of "ptr_arr" on the stack.
210 // Non-block impl. Note: concurrency is allowed only with other
211 // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
212 void par_adjoin_arr(oop* ptr_arr, int n);
214 // Pushes the first "n" elements of "ptr_arr" on the stack.
215 // Locking impl: concurrency is allowed only with
216 // "par_push_arr" and/or "par_pop_arr" operations, which use the same
217 // locking strategy.
218 void par_push_arr(oop* ptr_arr, int n);
220 // If returns false, the array was empty. Otherwise, removes up to "max"
221 // elements from the stack, and transfers them to "ptr_arr" in an
222 // unspecified order. The actual number transferred is given in "n" ("n
223 // == 0" is deliberately redundant with the return value.) Locking impl:
224 // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
225 // operations, which use the same locking strategy.
226 bool par_pop_arr(oop* ptr_arr, int max, int* n);
228 // Drain the mark stack, applying the given closure to all fields of
229 // objects on the stack. (That is, continue until the stack is empty,
230 // even if closure applications add entries to the stack.) The "bm"
231 // argument, if non-null, may be used to verify that only marked objects
232 // are on the mark stack. If "yield_after" is "true", then the
233 // concurrent marker performing the drain offers to yield after
234 // processing each object. If a yield occurs, stops the drain operation
235 // and returns false. Otherwise, returns true.
236 template<class OopClosureClass>
237 bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
239 bool isEmpty() { return _index == 0; }
240 bool isFull() { return _index == _capacity; }
241 int maxElems() { return _capacity; }
243 bool overflow() { return _overflow; }
244 void clear_overflow() { _overflow = false; }
246 int size() { return _index; }
248 void setEmpty() { _index = 0; clear_overflow(); }
250 // Record the current size; a subsequent "oops_do" will iterate only over
251 // indices valid at the time of this call.
252 void set_oops_do_bound(jint bound = -1) {
253 if (bound == -1) {
254 _oops_do_bound = _index;
255 } else {
256 _oops_do_bound = bound;
257 }
258 }
259 jint oops_do_bound() { return _oops_do_bound; }
260 // iterate over the oops in the mark stack, up to the bound recorded via
261 // the call above.
262 void oops_do(OopClosure* f);
263 };
265 class CMRegionStack VALUE_OBJ_CLASS_SPEC {
266 MemRegion* _base;
267 jint _capacity;
268 jint _index;
269 jint _oops_do_bound;
270 bool _overflow;
271 public:
272 CMRegionStack();
273 ~CMRegionStack();
274 void allocate(size_t size);
276 // This is lock-free; assumes that it will only be called in parallel
277 // with other "push" operations (no pops).
278 void push_lock_free(MemRegion mr);
280 // Lock-free; assumes that it will only be called in parallel
281 // with other "pop" operations (no pushes).
282 MemRegion pop_lock_free();
284 #if 0
285 // The routines that manipulate the region stack with a lock are
286 // not currently used. They should be retained, however, as a
287 // diagnostic aid.
289 // These two are the implementations that use a lock. They can be
290 // called concurrently with each other but they should not be called
291 // concurrently with the lock-free versions (push() / pop()).
292 void push_with_lock(MemRegion mr);
293 MemRegion pop_with_lock();
294 #endif
296 bool isEmpty() { return _index == 0; }
297 bool isFull() { return _index == _capacity; }
299 bool overflow() { return _overflow; }
300 void clear_overflow() { _overflow = false; }
302 int size() { return _index; }
304 // It iterates over the entries in the region stack and it
305 // invalidates (i.e. assigns MemRegion()) the ones that point to
306 // regions in the collection set.
307 bool invalidate_entries_into_cset();
309 // This gives an upper bound up to which the iteration in
310 // invalidate_entries_into_cset() will reach. This prevents
311 // newly-added entries to be unnecessarily scanned.
312 void set_oops_do_bound() {
313 _oops_do_bound = _index;
314 }
316 void setEmpty() { _index = 0; clear_overflow(); }
317 };
319 class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
320 private:
321 #ifndef PRODUCT
322 uintx _num_remaining;
323 bool _force;
324 #endif // !defined(PRODUCT)
326 public:
327 void init() PRODUCT_RETURN;
328 void update() PRODUCT_RETURN;
329 bool should_force() PRODUCT_RETURN_( return false; );
330 };
332 // this will enable a variety of different statistics per GC task
333 #define _MARKING_STATS_ 0
334 // this will enable the higher verbose levels
335 #define _MARKING_VERBOSE_ 0
337 #if _MARKING_STATS_
338 #define statsOnly(statement) \
339 do { \
340 statement ; \
341 } while (0)
342 #else // _MARKING_STATS_
343 #define statsOnly(statement) \
344 do { \
345 } while (0)
346 #endif // _MARKING_STATS_
348 typedef enum {
349 no_verbose = 0, // verbose turned off
350 stats_verbose, // only prints stats at the end of marking
351 low_verbose, // low verbose, mostly per region and per major event
352 medium_verbose, // a bit more detailed than low
353 high_verbose // per object verbose
354 } CMVerboseLevel;
357 class ConcurrentMarkThread;
359 class ConcurrentMark: public CHeapObj {
360 friend class ConcurrentMarkThread;
361 friend class CMTask;
362 friend class CMBitMapClosure;
363 friend class CSetMarkOopClosure;
364 friend class CMGlobalObjectClosure;
365 friend class CMRemarkTask;
366 friend class CMConcurrentMarkingTask;
367 friend class G1ParNoteEndTask;
368 friend class CalcLiveObjectsClosure;
369 friend class G1CMRefProcTaskProxy;
370 friend class G1CMRefProcTaskExecutor;
371 friend class G1CMParKeepAliveAndDrainClosure;
372 friend class G1CMParDrainMarkingStackClosure;
374 protected:
375 ConcurrentMarkThread* _cmThread; // the thread doing the work
376 G1CollectedHeap* _g1h; // the heap.
377 size_t _parallel_marking_threads; // the number of marking
378 // threads we're use
379 size_t _max_parallel_marking_threads; // max number of marking
380 // threads we'll ever use
381 double _sleep_factor; // how much we have to sleep, with
382 // respect to the work we just did, to
383 // meet the marking overhead goal
384 double _marking_task_overhead; // marking target overhead for
385 // a single task
387 // same as the two above, but for the cleanup task
388 double _cleanup_sleep_factor;
389 double _cleanup_task_overhead;
391 FreeRegionList _cleanup_list;
393 // CMS marking support structures
394 CMBitMap _markBitMap1;
395 CMBitMap _markBitMap2;
396 CMBitMapRO* _prevMarkBitMap; // completed mark bitmap
397 CMBitMap* _nextMarkBitMap; // under-construction mark bitmap
398 bool _at_least_one_mark_complete;
400 BitMap _region_bm;
401 BitMap _card_bm;
403 // Heap bounds
404 HeapWord* _heap_start;
405 HeapWord* _heap_end;
407 // For gray objects
408 CMMarkStack _markStack; // Grey objects behind global finger.
409 CMRegionStack _regionStack; // Grey regions behind global finger.
410 HeapWord* volatile _finger; // the global finger, region aligned,
411 // always points to the end of the
412 // last claimed region
414 // marking tasks
415 size_t _max_task_num; // maximum task number
416 size_t _active_tasks; // task num currently active
417 CMTask** _tasks; // task queue array (max_task_num len)
418 CMTaskQueueSet* _task_queues; // task queue set
419 ParallelTaskTerminator _terminator; // for termination
421 // Two sync barriers that are used to synchronise tasks when an
422 // overflow occurs. The algorithm is the following. All tasks enter
423 // the first one to ensure that they have all stopped manipulating
424 // the global data structures. After they exit it, they re-initialise
425 // their data structures and task 0 re-initialises the global data
426 // structures. Then, they enter the second sync barrier. This
427 // ensure, that no task starts doing work before all data
428 // structures (local and global) have been re-initialised. When they
429 // exit it, they are free to start working again.
430 WorkGangBarrierSync _first_overflow_barrier_sync;
431 WorkGangBarrierSync _second_overflow_barrier_sync;
434 // this is set by any task, when an overflow on the global data
435 // structures is detected.
436 volatile bool _has_overflown;
437 // true: marking is concurrent, false: we're in remark
438 volatile bool _concurrent;
439 // set at the end of a Full GC so that marking aborts
440 volatile bool _has_aborted;
442 // used when remark aborts due to an overflow to indicate that
443 // another concurrent marking phase should start
444 volatile bool _restart_for_overflow;
446 // This is true from the very start of concurrent marking until the
447 // point when all the tasks complete their work. It is really used
448 // to determine the points between the end of concurrent marking and
449 // time of remark.
450 volatile bool _concurrent_marking_in_progress;
452 // verbose level
453 CMVerboseLevel _verbose_level;
455 // These two fields are used to implement the optimisation that
456 // avoids pushing objects on the global/region stack if there are
457 // no collection set regions above the lowest finger.
459 // This is the lowest finger (among the global and local fingers),
460 // which is calculated before a new collection set is chosen.
461 HeapWord* _min_finger;
462 // If this flag is true, objects/regions that are marked below the
463 // finger should be pushed on the stack(s). If this is flag is
464 // false, it is safe not to push them on the stack(s).
465 bool _should_gray_objects;
467 // All of these times are in ms.
468 NumberSeq _init_times;
469 NumberSeq _remark_times;
470 NumberSeq _remark_mark_times;
471 NumberSeq _remark_weak_ref_times;
472 NumberSeq _cleanup_times;
473 double _total_counting_time;
474 double _total_rs_scrub_time;
476 double* _accum_task_vtime; // accumulated task vtime
478 FlexibleWorkGang* _parallel_workers;
480 ForceOverflowSettings _force_overflow_conc;
481 ForceOverflowSettings _force_overflow_stw;
483 void weakRefsWork(bool clear_all_soft_refs);
485 void swapMarkBitMaps();
487 // It resets the global marking data structures, as well as the
488 // task local ones; should be called during initial mark.
489 void reset();
490 // It resets all the marking data structures.
491 void clear_marking_state(bool clear_overflow = true);
493 // It should be called to indicate which phase we're in (concurrent
494 // mark or remark) and how many threads are currently active.
495 void set_phase(size_t active_tasks, bool concurrent);
496 // We do this after we're done with marking so that the marking data
497 // structures are initialised to a sensible and predictable state.
498 void set_non_marking_state();
500 // prints all gathered CM-related statistics
501 void print_stats();
503 bool cleanup_list_is_empty() {
504 return _cleanup_list.is_empty();
505 }
507 // accessor methods
508 size_t parallel_marking_threads() { return _parallel_marking_threads; }
509 size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;}
510 double sleep_factor() { return _sleep_factor; }
511 double marking_task_overhead() { return _marking_task_overhead;}
512 double cleanup_sleep_factor() { return _cleanup_sleep_factor; }
513 double cleanup_task_overhead() { return _cleanup_task_overhead;}
515 HeapWord* finger() { return _finger; }
516 bool concurrent() { return _concurrent; }
517 size_t active_tasks() { return _active_tasks; }
518 ParallelTaskTerminator* terminator() { return &_terminator; }
520 // It claims the next available region to be scanned by a marking
521 // task. It might return NULL if the next region is empty or we have
522 // run out of regions. In the latter case, out_of_regions()
523 // determines whether we've really run out of regions or the task
524 // should call claim_region() again. This might seem a bit
525 // awkward. Originally, the code was written so that claim_region()
526 // either successfully returned with a non-empty region or there
527 // were no more regions to be claimed. The problem with this was
528 // that, in certain circumstances, it iterated over large chunks of
529 // the heap finding only empty regions and, while it was working, it
530 // was preventing the calling task to call its regular clock
531 // method. So, this way, each task will spend very little time in
532 // claim_region() and is allowed to call the regular clock method
533 // frequently.
534 HeapRegion* claim_region(int task);
536 // It determines whether we've run out of regions to scan.
537 bool out_of_regions() { return _finger == _heap_end; }
539 // Returns the task with the given id
540 CMTask* task(int id) {
541 assert(0 <= id && id < (int) _active_tasks,
542 "task id not within active bounds");
543 return _tasks[id];
544 }
546 // Returns the task queue with the given id
547 CMTaskQueue* task_queue(int id) {
548 assert(0 <= id && id < (int) _active_tasks,
549 "task queue id not within active bounds");
550 return (CMTaskQueue*) _task_queues->queue(id);
551 }
553 // Returns the task queue set
554 CMTaskQueueSet* task_queues() { return _task_queues; }
556 // Access / manipulation of the overflow flag which is set to
557 // indicate that the global stack or region stack has overflown
558 bool has_overflown() { return _has_overflown; }
559 void set_has_overflown() { _has_overflown = true; }
560 void clear_has_overflown() { _has_overflown = false; }
562 bool has_aborted() { return _has_aborted; }
563 bool restart_for_overflow() { return _restart_for_overflow; }
565 // Methods to enter the two overflow sync barriers
566 void enter_first_sync_barrier(int task_num);
567 void enter_second_sync_barrier(int task_num);
569 ForceOverflowSettings* force_overflow_conc() {
570 return &_force_overflow_conc;
571 }
573 ForceOverflowSettings* force_overflow_stw() {
574 return &_force_overflow_stw;
575 }
577 ForceOverflowSettings* force_overflow() {
578 if (concurrent()) {
579 return force_overflow_conc();
580 } else {
581 return force_overflow_stw();
582 }
583 }
585 public:
586 // Manipulation of the global mark stack.
587 // Notice that the first mark_stack_push is CAS-based, whereas the
588 // two below are Mutex-based. This is OK since the first one is only
589 // called during evacuation pauses and doesn't compete with the
590 // other two (which are called by the marking tasks during
591 // concurrent marking or remark).
592 bool mark_stack_push(oop p) {
593 _markStack.par_push(p);
594 if (_markStack.overflow()) {
595 set_has_overflown();
596 return false;
597 }
598 return true;
599 }
600 bool mark_stack_push(oop* arr, int n) {
601 _markStack.par_push_arr(arr, n);
602 if (_markStack.overflow()) {
603 set_has_overflown();
604 return false;
605 }
606 return true;
607 }
608 void mark_stack_pop(oop* arr, int max, int* n) {
609 _markStack.par_pop_arr(arr, max, n);
610 }
611 size_t mark_stack_size() { return _markStack.size(); }
612 size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
613 bool mark_stack_overflow() { return _markStack.overflow(); }
614 bool mark_stack_empty() { return _markStack.isEmpty(); }
616 // (Lock-free) Manipulation of the region stack
617 bool region_stack_push_lock_free(MemRegion mr) {
618 // Currently we only call the lock-free version during evacuation
619 // pauses.
620 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
622 _regionStack.push_lock_free(mr);
623 if (_regionStack.overflow()) {
624 set_has_overflown();
625 return false;
626 }
627 return true;
628 }
630 // Lock-free version of region-stack pop. Should only be
631 // called in tandem with other lock-free pops.
632 MemRegion region_stack_pop_lock_free() {
633 return _regionStack.pop_lock_free();
634 }
636 #if 0
637 // The routines that manipulate the region stack with a lock are
638 // not currently used. They should be retained, however, as a
639 // diagnostic aid.
641 bool region_stack_push_with_lock(MemRegion mr) {
642 // Currently we only call the lock-based version during either
643 // concurrent marking or remark.
644 assert(!SafepointSynchronize::is_at_safepoint() || !concurrent(),
645 "if we are at a safepoint it should be the remark safepoint");
647 _regionStack.push_with_lock(mr);
648 if (_regionStack.overflow()) {
649 set_has_overflown();
650 return false;
651 }
652 return true;
653 }
655 MemRegion region_stack_pop_with_lock() {
656 // Currently we only call the lock-based version during either
657 // concurrent marking or remark.
658 assert(!SafepointSynchronize::is_at_safepoint() || !concurrent(),
659 "if we are at a safepoint it should be the remark safepoint");
661 return _regionStack.pop_with_lock();
662 }
663 #endif
665 int region_stack_size() { return _regionStack.size(); }
666 bool region_stack_overflow() { return _regionStack.overflow(); }
667 bool region_stack_empty() { return _regionStack.isEmpty(); }
669 // Iterate over any regions that were aborted while draining the
670 // region stack (any such regions are saved in the corresponding
671 // CMTask) and invalidate (i.e. assign to the empty MemRegion())
672 // any regions that point into the collection set.
673 bool invalidate_aborted_regions_in_cset();
675 // Returns true if there are any aborted memory regions.
676 bool has_aborted_regions();
678 bool concurrent_marking_in_progress() {
679 return _concurrent_marking_in_progress;
680 }
681 void set_concurrent_marking_in_progress() {
682 _concurrent_marking_in_progress = true;
683 }
684 void clear_concurrent_marking_in_progress() {
685 _concurrent_marking_in_progress = false;
686 }
688 void update_accum_task_vtime(int i, double vtime) {
689 _accum_task_vtime[i] += vtime;
690 }
692 double all_task_accum_vtime() {
693 double ret = 0.0;
694 for (int i = 0; i < (int)_max_task_num; ++i)
695 ret += _accum_task_vtime[i];
696 return ret;
697 }
699 // Attempts to steal an object from the task queues of other tasks
700 bool try_stealing(int task_num, int* hash_seed, oop& obj) {
701 return _task_queues->steal(task_num, hash_seed, obj);
702 }
704 // It grays an object by first marking it. Then, if it's behind the
705 // global finger, it also pushes it on the global stack.
706 void deal_with_reference(oop obj);
708 ConcurrentMark(ReservedSpace rs, int max_regions);
709 ~ConcurrentMark();
710 ConcurrentMarkThread* cmThread() { return _cmThread; }
712 CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
713 CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; }
715 // Returns the number of GC threads to be used in a concurrent
716 // phase based on the number of GC threads being used in a STW
717 // phase.
718 size_t scale_parallel_threads(size_t n_par_threads);
720 // Calculates the number of GC threads to be used in a concurrent phase.
721 int calc_parallel_marking_threads();
723 // The following three are interaction between CM and
724 // G1CollectedHeap
726 // This notifies CM that a root during initial-mark needs to be
727 // grayed and it's MT-safe. Currently, we just mark it. But, in the
728 // future, we can experiment with pushing it on the stack and we can
729 // do this without changing G1CollectedHeap.
730 void grayRoot(oop p);
731 // It's used during evacuation pauses to gray a region, if
732 // necessary, and it's MT-safe. It assumes that the caller has
733 // marked any objects on that region. If _should_gray_objects is
734 // true and we're still doing concurrent marking, the region is
735 // pushed on the region stack, if it is located below the global
736 // finger, otherwise we do nothing.
737 void grayRegionIfNecessary(MemRegion mr);
738 // It's used during evacuation pauses to mark and, if necessary,
739 // gray a single object and it's MT-safe. It assumes the caller did
740 // not mark the object. If _should_gray_objects is true and we're
741 // still doing concurrent marking, the objects is pushed on the
742 // global stack, if it is located below the global finger, otherwise
743 // we do nothing.
744 void markAndGrayObjectIfNecessary(oop p);
746 // It iterates over the heap and for each object it comes across it
747 // will dump the contents of its reference fields, as well as
748 // liveness information for the object and its referents. The dump
749 // will be written to a file with the following name:
750 // G1PrintReachableBaseFile + "." + str.
751 // vo decides whether the prev (vo == UsePrevMarking), the next
752 // (vo == UseNextMarking) marking information, or the mark word
753 // (vo == UseMarkWord) will be used to determine the liveness of
754 // each object / referent.
755 // If all is true, all objects in the heap will be dumped, otherwise
756 // only the live ones. In the dump the following symbols / breviations
757 // are used:
758 // M : an explicitly live object (its bitmap bit is set)
759 // > : an implicitly live object (over tams)
760 // O : an object outside the G1 heap (typically: in the perm gen)
761 // NOT : a reference field whose referent is not live
762 // AND MARKED : indicates that an object is both explicitly and
763 // implicitly live (it should be one or the other, not both)
764 void print_reachable(const char* str,
765 VerifyOption vo, bool all) PRODUCT_RETURN;
767 // Clear the next marking bitmap (will be called concurrently).
768 void clearNextBitmap();
770 // These two do the work that needs to be done before and after the
771 // initial root checkpoint. Since this checkpoint can be done at two
772 // different points (i.e. an explicit pause or piggy-backed on a
773 // young collection), then it's nice to be able to easily share the
774 // pre/post code. It might be the case that we can put everything in
775 // the post method. TP
776 void checkpointRootsInitialPre();
777 void checkpointRootsInitialPost();
779 // Do concurrent phase of marking, to a tentative transitive closure.
780 void markFromRoots();
782 // Process all unprocessed SATB buffers. It is called at the
783 // beginning of an evacuation pause.
784 void drainAllSATBBuffers();
786 void checkpointRootsFinal(bool clear_all_soft_refs);
787 void checkpointRootsFinalWork();
788 void calcDesiredRegions();
789 void cleanup();
790 void completeCleanup();
792 // Mark in the previous bitmap. NB: this is usually read-only, so use
793 // this carefully!
794 void markPrev(oop p);
795 void clear(oop p);
796 // Clears marks for all objects in the given range, for both prev and
797 // next bitmaps. NB: the previous bitmap is usually read-only, so use
798 // this carefully!
799 void clearRangeBothMaps(MemRegion mr);
801 // Record the current top of the mark and region stacks; a
802 // subsequent oops_do() on the mark stack and
803 // invalidate_entries_into_cset() on the region stack will iterate
804 // only over indices valid at the time of this call.
805 void set_oops_do_bound() {
806 _markStack.set_oops_do_bound();
807 _regionStack.set_oops_do_bound();
808 }
809 // Iterate over the oops in the mark stack and all local queues. It
810 // also calls invalidate_entries_into_cset() on the region stack.
811 void oops_do(OopClosure* f);
812 // It is called at the end of an evacuation pause during marking so
813 // that CM is notified of where the new end of the heap is. It
814 // doesn't do anything if concurrent_marking_in_progress() is false,
815 // unless the force parameter is true.
816 void update_g1_committed(bool force = false);
818 void complete_marking_in_collection_set();
820 // It indicates that a new collection set is being chosen.
821 void newCSet();
823 // It registers a collection set heap region with CM. This is used
824 // to determine whether any heap regions are located above the finger.
825 void registerCSetRegion(HeapRegion* hr);
827 // Resets the region fields of any active CMTask whose region fields
828 // are in the collection set (i.e. the region currently claimed by
829 // the CMTask will be evacuated and may be used, subsequently, as
830 // an alloc region). When this happens the region fields in the CMTask
831 // are stale and, hence, should be cleared causing the worker thread
832 // to claim a new region.
833 void reset_active_task_region_fields_in_cset();
835 // Registers the maximum region-end associated with a set of
836 // regions with CM. Again this is used to determine whether any
837 // heap regions are located above the finger.
838 void register_collection_set_finger(HeapWord* max_finger) {
839 // max_finger is the highest heap region end of the regions currently
840 // contained in the collection set. If this value is larger than
841 // _min_finger then we need to gray objects.
842 // This routine is like registerCSetRegion but for an entire
843 // collection of regions.
844 if (max_finger > _min_finger) {
845 _should_gray_objects = true;
846 }
847 }
849 // Returns "true" if at least one mark has been completed.
850 bool at_least_one_mark_complete() { return _at_least_one_mark_complete; }
852 bool isMarked(oop p) const {
853 assert(p != NULL && p->is_oop(), "expected an oop");
854 HeapWord* addr = (HeapWord*)p;
855 assert(addr >= _nextMarkBitMap->startWord() ||
856 addr < _nextMarkBitMap->endWord(), "in a region");
858 return _nextMarkBitMap->isMarked(addr);
859 }
861 inline bool not_yet_marked(oop p) const;
863 // XXX Debug code
864 bool containing_card_is_marked(void* p);
865 bool containing_cards_are_marked(void* start, void* last);
867 bool isPrevMarked(oop p) const {
868 assert(p != NULL && p->is_oop(), "expected an oop");
869 HeapWord* addr = (HeapWord*)p;
870 assert(addr >= _prevMarkBitMap->startWord() ||
871 addr < _prevMarkBitMap->endWord(), "in a region");
873 return _prevMarkBitMap->isMarked(addr);
874 }
876 inline bool do_yield_check(int worker_i = 0);
877 inline bool should_yield();
879 // Called to abort the marking cycle after a Full GC takes palce.
880 void abort();
882 // This prints the global/local fingers. It is used for debugging.
883 NOT_PRODUCT(void print_finger();)
885 void print_summary_info();
887 void print_worker_threads_on(outputStream* st) const;
889 // The following indicate whether a given verbose level has been
890 // set. Notice that anything above stats is conditional to
891 // _MARKING_VERBOSE_ having been set to 1
892 bool verbose_stats() {
893 return _verbose_level >= stats_verbose;
894 }
895 bool verbose_low() {
896 return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
897 }
898 bool verbose_medium() {
899 return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
900 }
901 bool verbose_high() {
902 return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
903 }
904 };
906 // A class representing a marking task.
907 class CMTask : public TerminatorTerminator {
908 private:
909 enum PrivateConstants {
910 // the regular clock call is called once the scanned words reaches
911 // this limit
912 words_scanned_period = 12*1024,
913 // the regular clock call is called once the number of visited
914 // references reaches this limit
915 refs_reached_period = 384,
916 // initial value for the hash seed, used in the work stealing code
917 init_hash_seed = 17,
918 // how many entries will be transferred between global stack and
919 // local queues
920 global_stack_transfer_size = 16
921 };
923 int _task_id;
924 G1CollectedHeap* _g1h;
925 ConcurrentMark* _cm;
926 CMBitMap* _nextMarkBitMap;
927 // the task queue of this task
928 CMTaskQueue* _task_queue;
929 private:
930 // the task queue set---needed for stealing
931 CMTaskQueueSet* _task_queues;
932 // indicates whether the task has been claimed---this is only for
933 // debugging purposes
934 bool _claimed;
936 // number of calls to this task
937 int _calls;
939 // when the virtual timer reaches this time, the marking step should
940 // exit
941 double _time_target_ms;
942 // the start time of the current marking step
943 double _start_time_ms;
945 // the oop closure used for iterations over oops
946 G1CMOopClosure* _cm_oop_closure;
948 // the region this task is scanning, NULL if we're not scanning any
949 HeapRegion* _curr_region;
950 // the local finger of this task, NULL if we're not scanning a region
951 HeapWord* _finger;
952 // limit of the region this task is scanning, NULL if we're not scanning one
953 HeapWord* _region_limit;
955 // This is used only when we scan regions popped from the region
956 // stack. It records what the last object on such a region we
957 // scanned was. It is used to ensure that, if we abort region
958 // iteration, we do not rescan the first part of the region. This
959 // should be NULL when we're not scanning a region from the region
960 // stack.
961 HeapWord* _region_finger;
963 // If we abort while scanning a region we record the remaining
964 // unscanned portion and check this field when marking restarts.
965 // This avoids having to push on the region stack while other
966 // marking threads may still be popping regions.
967 // If we were to push the unscanned portion directly to the
968 // region stack then we would need to using locking versions
969 // of the push and pop operations.
970 MemRegion _aborted_region;
972 // the number of words this task has scanned
973 size_t _words_scanned;
974 // When _words_scanned reaches this limit, the regular clock is
975 // called. Notice that this might be decreased under certain
976 // circumstances (i.e. when we believe that we did an expensive
977 // operation).
978 size_t _words_scanned_limit;
979 // the initial value of _words_scanned_limit (i.e. what it was
980 // before it was decreased).
981 size_t _real_words_scanned_limit;
983 // the number of references this task has visited
984 size_t _refs_reached;
985 // When _refs_reached reaches this limit, the regular clock is
986 // called. Notice this this might be decreased under certain
987 // circumstances (i.e. when we believe that we did an expensive
988 // operation).
989 size_t _refs_reached_limit;
990 // the initial value of _refs_reached_limit (i.e. what it was before
991 // it was decreased).
992 size_t _real_refs_reached_limit;
994 // used by the work stealing stuff
995 int _hash_seed;
996 // if this is true, then the task has aborted for some reason
997 bool _has_aborted;
998 // set when the task aborts because it has met its time quota
999 bool _has_timed_out;
1000 // true when we're draining SATB buffers; this avoids the task
1001 // aborting due to SATB buffers being available (as we're already
1002 // dealing with them)
1003 bool _draining_satb_buffers;
1005 // number sequence of past step times
1006 NumberSeq _step_times_ms;
1007 // elapsed time of this task
1008 double _elapsed_time_ms;
1009 // termination time of this task
1010 double _termination_time_ms;
1011 // when this task got into the termination protocol
1012 double _termination_start_time_ms;
1014 // true when the task is during a concurrent phase, false when it is
1015 // in the remark phase (so, in the latter case, we do not have to
1016 // check all the things that we have to check during the concurrent
1017 // phase, i.e. SATB buffer availability...)
1018 bool _concurrent;
1020 TruncatedSeq _marking_step_diffs_ms;
1022 // LOTS of statistics related with this task
1023 #if _MARKING_STATS_
1024 NumberSeq _all_clock_intervals_ms;
1025 double _interval_start_time_ms;
1027 int _aborted;
1028 int _aborted_overflow;
1029 int _aborted_cm_aborted;
1030 int _aborted_yield;
1031 int _aborted_timed_out;
1032 int _aborted_satb;
1033 int _aborted_termination;
1035 int _steal_attempts;
1036 int _steals;
1038 int _clock_due_to_marking;
1039 int _clock_due_to_scanning;
1041 int _local_pushes;
1042 int _local_pops;
1043 int _local_max_size;
1044 int _objs_scanned;
1046 int _global_pushes;
1047 int _global_pops;
1048 int _global_max_size;
1050 int _global_transfers_to;
1051 int _global_transfers_from;
1053 int _region_stack_pops;
1055 int _regions_claimed;
1056 int _objs_found_on_bitmap;
1058 int _satb_buffers_processed;
1059 #endif // _MARKING_STATS_
1061 // it updates the local fields after this task has claimed
1062 // a new region to scan
1063 void setup_for_region(HeapRegion* hr);
1064 // it brings up-to-date the limit of the region
1065 void update_region_limit();
1067 // called when either the words scanned or the refs visited limit
1068 // has been reached
1069 void reached_limit();
1070 // recalculates the words scanned and refs visited limits
1071 void recalculate_limits();
1072 // decreases the words scanned and refs visited limits when we reach
1073 // an expensive operation
1074 void decrease_limits();
1075 // it checks whether the words scanned or refs visited reached their
1076 // respective limit and calls reached_limit() if they have
1077 void check_limits() {
1078 if (_words_scanned >= _words_scanned_limit ||
1079 _refs_reached >= _refs_reached_limit) {
1080 reached_limit();
1081 }
1082 }
1083 // this is supposed to be called regularly during a marking step as
1084 // it checks a bunch of conditions that might cause the marking step
1085 // to abort
1086 void regular_clock_call();
1087 bool concurrent() { return _concurrent; }
1089 public:
1090 // It resets the task; it should be called right at the beginning of
1091 // a marking phase.
1092 void reset(CMBitMap* _nextMarkBitMap);
1093 // it clears all the fields that correspond to a claimed region.
1094 void clear_region_fields();
1096 void set_concurrent(bool concurrent) { _concurrent = concurrent; }
1098 // The main method of this class which performs a marking step
1099 // trying not to exceed the given duration. However, it might exit
1100 // prematurely, according to some conditions (i.e. SATB buffers are
1101 // available for processing).
1102 void do_marking_step(double target_ms, bool do_stealing, bool do_termination);
1104 // These two calls start and stop the timer
1105 void record_start_time() {
1106 _elapsed_time_ms = os::elapsedTime() * 1000.0;
1107 }
1108 void record_end_time() {
1109 _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
1110 }
1112 // returns the task ID
1113 int task_id() { return _task_id; }
1115 // From TerminatorTerminator. It determines whether this task should
1116 // exit the termination protocol after it's entered it.
1117 virtual bool should_exit_termination();
1119 // Resets the local region fields after a task has finished scanning a
1120 // region; or when they have become stale as a result of the region
1121 // being evacuated.
1122 void giveup_current_region();
1124 HeapWord* finger() { return _finger; }
1126 bool has_aborted() { return _has_aborted; }
1127 void set_has_aborted() { _has_aborted = true; }
1128 void clear_has_aborted() { _has_aborted = false; }
1129 bool has_timed_out() { return _has_timed_out; }
1130 bool claimed() { return _claimed; }
1132 // Support routines for the partially scanned region that may be
1133 // recorded as a result of aborting while draining the CMRegionStack
1134 MemRegion aborted_region() { return _aborted_region; }
1135 void set_aborted_region(MemRegion mr)
1136 { _aborted_region = mr; }
1138 // Clears any recorded partially scanned region
1139 void clear_aborted_region() { set_aborted_region(MemRegion()); }
1141 void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
1143 // It grays the object by marking it and, if necessary, pushing it
1144 // on the local queue
1145 inline void deal_with_reference(oop obj);
1147 // It scans an object and visits its children.
1148 void scan_object(oop obj);
1150 // It pushes an object on the local queue.
1151 inline void push(oop obj);
1153 // These two move entries to/from the global stack.
1154 void move_entries_to_global_stack();
1155 void get_entries_from_global_stack();
1157 // It pops and scans objects from the local queue. If partially is
1158 // true, then it stops when the queue size is of a given limit. If
1159 // partially is false, then it stops when the queue is empty.
1160 void drain_local_queue(bool partially);
1161 // It moves entries from the global stack to the local queue and
1162 // drains the local queue. If partially is true, then it stops when
1163 // both the global stack and the local queue reach a given size. If
1164 // partially if false, it tries to empty them totally.
1165 void drain_global_stack(bool partially);
1166 // It keeps picking SATB buffers and processing them until no SATB
1167 // buffers are available.
1168 void drain_satb_buffers();
1169 // It keeps popping regions from the region stack and processing
1170 // them until the region stack is empty.
1171 void drain_region_stack(BitMapClosure* closure);
1173 // moves the local finger to a new location
1174 inline void move_finger_to(HeapWord* new_finger) {
1175 assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
1176 _finger = new_finger;
1177 }
1179 // moves the region finger to a new location
1180 inline void move_region_finger_to(HeapWord* new_finger) {
1181 assert(new_finger < _cm->finger(), "invariant");
1182 _region_finger = new_finger;
1183 }
1185 CMTask(int task_num, ConcurrentMark *cm,
1186 CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
1188 // it prints statistics associated with this task
1189 void print_stats();
1191 #if _MARKING_STATS_
1192 void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
1193 #endif // _MARKING_STATS_
1194 };
1196 // Class that's used to to print out per-region liveness
1197 // information. It's currently used at the end of marking and also
1198 // after we sort the old regions at the end of the cleanup operation.
1199 class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
1200 private:
1201 outputStream* _out;
1203 // Accumulators for these values.
1204 size_t _total_used_bytes;
1205 size_t _total_capacity_bytes;
1206 size_t _total_prev_live_bytes;
1207 size_t _total_next_live_bytes;
1209 // These are set up when we come across a "stars humongous" region
1210 // (as this is where most of this information is stored, not in the
1211 // subsequent "continues humongous" regions). After that, for every
1212 // region in a given humongous region series we deduce the right
1213 // values for it by simply subtracting the appropriate amount from
1214 // these fields. All these values should reach 0 after we've visited
1215 // the last region in the series.
1216 size_t _hum_used_bytes;
1217 size_t _hum_capacity_bytes;
1218 size_t _hum_prev_live_bytes;
1219 size_t _hum_next_live_bytes;
1221 static double perc(size_t val, size_t total) {
1222 if (total == 0) {
1223 return 0.0;
1224 } else {
1225 return 100.0 * ((double) val / (double) total);
1226 }
1227 }
1229 static double bytes_to_mb(size_t val) {
1230 return (double) val / (double) M;
1231 }
1233 // See the .cpp file.
1234 size_t get_hum_bytes(size_t* hum_bytes);
1235 void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
1236 size_t* prev_live_bytes, size_t* next_live_bytes);
1238 public:
1239 // The header and footer are printed in the constructor and
1240 // destructor respectively.
1241 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name);
1242 virtual bool doHeapRegion(HeapRegion* r);
1243 ~G1PrintRegionLivenessInfoClosure();
1244 };
1246 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP