Tue, 05 Mar 2013 15:36:56 -0800
8008079: G1: Add nextObject routine to CMBitMapRO and replace nextWord
Summary: Update the task local finger to the start of the next object when marking aborts, in order to avoid the redundant scanning of all 0's when the marking task restarts, if otherwise updating to the next word. In addition, reuse the routine nextObject() in routine iterate().
Reviewed-by: johnc, ysr
Contributed-by: tamao <tao.mao@oracle.com>
1 /*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
28 #include "gc_implementation/g1/heapRegionSets.hpp"
29 #include "utilities/taskqueue.hpp"
31 class G1CollectedHeap;
32 class CMTask;
33 typedef GenericTaskQueue<oop, mtGC> CMTaskQueue;
34 typedef GenericTaskQueueSet<CMTaskQueue, mtGC> CMTaskQueueSet;
36 // Closure used by CM during concurrent reference discovery
37 // and reference processing (during remarking) to determine
38 // if a particular object is alive. It is primarily used
39 // to determine if referents of discovered reference objects
40 // are alive. An instance is also embedded into the
41 // reference processor as the _is_alive_non_header field
42 class G1CMIsAliveClosure: public BoolObjectClosure {
43 G1CollectedHeap* _g1;
44 public:
45 G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { }
47 void do_object(oop obj) {
48 ShouldNotCallThis();
49 }
50 bool do_object_b(oop obj);
51 };
53 // A generic CM bit map. This is essentially a wrapper around the BitMap
54 // class, with one bit per (1<<_shifter) HeapWords.
56 class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
57 protected:
58 HeapWord* _bmStartWord; // base address of range covered by map
59 size_t _bmWordSize; // map size (in #HeapWords covered)
60 const int _shifter; // map to char or bit
61 VirtualSpace _virtual_space; // underlying the bit map
62 BitMap _bm; // the bit map itself
64 public:
65 // constructor
66 CMBitMapRO(int shifter);
68 enum { do_yield = true };
70 // inquiries
71 HeapWord* startWord() const { return _bmStartWord; }
72 size_t sizeInWords() const { return _bmWordSize; }
73 // the following is one past the last word in space
74 HeapWord* endWord() const { return _bmStartWord + _bmWordSize; }
76 // read marks
78 bool isMarked(HeapWord* addr) const {
79 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
80 "outside underlying space?");
81 return _bm.at(heapWordToOffset(addr));
82 }
84 // iteration
85 inline bool iterate(BitMapClosure* cl, MemRegion mr);
86 inline bool iterate(BitMapClosure* cl);
88 // Return the address corresponding to the next marked bit at or after
89 // "addr", and before "limit", if "limit" is non-NULL. If there is no
90 // such bit, returns "limit" if that is non-NULL, or else "endWord()".
91 HeapWord* getNextMarkedWordAddress(HeapWord* addr,
92 HeapWord* limit = NULL) const;
93 // Return the address corresponding to the next unmarked bit at or after
94 // "addr", and before "limit", if "limit" is non-NULL. If there is no
95 // such bit, returns "limit" if that is non-NULL, or else "endWord()".
96 HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
97 HeapWord* limit = NULL) const;
99 // conversion utilities
100 HeapWord* offsetToHeapWord(size_t offset) const {
101 return _bmStartWord + (offset << _shifter);
102 }
103 size_t heapWordToOffset(HeapWord* addr) const {
104 return pointer_delta(addr, _bmStartWord) >> _shifter;
105 }
106 int heapWordDiffToOffsetDiff(size_t diff) const;
108 // The argument addr should be the start address of a valid object
109 HeapWord* nextObject(HeapWord* addr) {
110 oop obj = (oop) addr;
111 HeapWord* res = addr + obj->size();
112 assert(offsetToHeapWord(heapWordToOffset(res)) == res, "sanity");
113 return res;
114 }
116 // debugging
117 NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
118 };
120 class CMBitMap : public CMBitMapRO {
122 public:
123 // constructor
124 CMBitMap(int shifter) :
125 CMBitMapRO(shifter) {}
127 // Allocates the back store for the marking bitmap
128 bool allocate(ReservedSpace heap_rs);
130 // write marks
131 void mark(HeapWord* addr) {
132 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
133 "outside underlying space?");
134 _bm.set_bit(heapWordToOffset(addr));
135 }
136 void clear(HeapWord* addr) {
137 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
138 "outside underlying space?");
139 _bm.clear_bit(heapWordToOffset(addr));
140 }
141 bool parMark(HeapWord* addr) {
142 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
143 "outside underlying space?");
144 return _bm.par_set_bit(heapWordToOffset(addr));
145 }
146 bool parClear(HeapWord* addr) {
147 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
148 "outside underlying space?");
149 return _bm.par_clear_bit(heapWordToOffset(addr));
150 }
151 void markRange(MemRegion mr);
152 void clearAll();
153 void clearRange(MemRegion mr);
155 // Starting at the bit corresponding to "addr" (inclusive), find the next
156 // "1" bit, if any. This bit starts some run of consecutive "1"'s; find
157 // the end of this run (stopping at "end_addr"). Return the MemRegion
158 // covering from the start of the region corresponding to the first bit
159 // of the run to the end of the region corresponding to the last bit of
160 // the run. If there is no "1" bit at or after "addr", return an empty
161 // MemRegion.
162 MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
163 };
165 // Represents a marking stack used by ConcurrentMarking in the G1 collector.
166 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
167 VirtualSpace _virtual_space; // Underlying backing store for actual stack
168 ConcurrentMark* _cm;
169 oop* _base; // bottom of stack
170 jint _index; // one more than last occupied index
171 jint _capacity; // max #elements
172 jint _saved_index; // value of _index saved at start of GC
173 NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run
175 bool _overflow;
176 bool _should_expand;
177 DEBUG_ONLY(bool _drain_in_progress;)
178 DEBUG_ONLY(bool _drain_in_progress_yields;)
180 public:
181 CMMarkStack(ConcurrentMark* cm);
182 ~CMMarkStack();
184 #ifndef PRODUCT
185 jint max_depth() const {
186 return _max_depth;
187 }
188 #endif
190 bool allocate(size_t capacity);
192 oop pop() {
193 if (!isEmpty()) {
194 return _base[--_index] ;
195 }
196 return NULL;
197 }
199 // If overflow happens, don't do the push, and record the overflow.
200 // *Requires* that "ptr" is already marked.
201 void push(oop ptr) {
202 if (isFull()) {
203 // Record overflow.
204 _overflow = true;
205 return;
206 } else {
207 _base[_index++] = ptr;
208 NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
209 }
210 }
211 // Non-block impl. Note: concurrency is allowed only with other
212 // "par_push" operations, not with "pop" or "drain". We would need
213 // parallel versions of them if such concurrency was desired.
214 void par_push(oop ptr);
216 // Pushes the first "n" elements of "ptr_arr" on the stack.
217 // Non-block impl. Note: concurrency is allowed only with other
218 // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
219 void par_adjoin_arr(oop* ptr_arr, int n);
221 // Pushes the first "n" elements of "ptr_arr" on the stack.
222 // Locking impl: concurrency is allowed only with
223 // "par_push_arr" and/or "par_pop_arr" operations, which use the same
224 // locking strategy.
225 void par_push_arr(oop* ptr_arr, int n);
227 // If returns false, the array was empty. Otherwise, removes up to "max"
228 // elements from the stack, and transfers them to "ptr_arr" in an
229 // unspecified order. The actual number transferred is given in "n" ("n
230 // == 0" is deliberately redundant with the return value.) Locking impl:
231 // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
232 // operations, which use the same locking strategy.
233 bool par_pop_arr(oop* ptr_arr, int max, int* n);
235 // Drain the mark stack, applying the given closure to all fields of
236 // objects on the stack. (That is, continue until the stack is empty,
237 // even if closure applications add entries to the stack.) The "bm"
238 // argument, if non-null, may be used to verify that only marked objects
239 // are on the mark stack. If "yield_after" is "true", then the
240 // concurrent marker performing the drain offers to yield after
241 // processing each object. If a yield occurs, stops the drain operation
242 // and returns false. Otherwise, returns true.
243 template<class OopClosureClass>
244 bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
246 bool isEmpty() { return _index == 0; }
247 bool isFull() { return _index == _capacity; }
248 int maxElems() { return _capacity; }
250 bool overflow() { return _overflow; }
251 void clear_overflow() { _overflow = false; }
253 bool should_expand() const { return _should_expand; }
254 void set_should_expand();
256 // Expand the stack, typically in response to an overflow condition
257 void expand();
259 int size() { return _index; }
261 void setEmpty() { _index = 0; clear_overflow(); }
263 // Record the current index.
264 void note_start_of_gc();
266 // Make sure that we have not added any entries to the stack during GC.
267 void note_end_of_gc();
269 // iterate over the oops in the mark stack, up to the bound recorded via
270 // the call above.
271 void oops_do(OopClosure* f);
272 };
274 class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
275 private:
276 #ifndef PRODUCT
277 uintx _num_remaining;
278 bool _force;
279 #endif // !defined(PRODUCT)
281 public:
282 void init() PRODUCT_RETURN;
283 void update() PRODUCT_RETURN;
284 bool should_force() PRODUCT_RETURN_( return false; );
285 };
287 // this will enable a variety of different statistics per GC task
288 #define _MARKING_STATS_ 0
289 // this will enable the higher verbose levels
290 #define _MARKING_VERBOSE_ 0
292 #if _MARKING_STATS_
293 #define statsOnly(statement) \
294 do { \
295 statement ; \
296 } while (0)
297 #else // _MARKING_STATS_
298 #define statsOnly(statement) \
299 do { \
300 } while (0)
301 #endif // _MARKING_STATS_
303 typedef enum {
304 no_verbose = 0, // verbose turned off
305 stats_verbose, // only prints stats at the end of marking
306 low_verbose, // low verbose, mostly per region and per major event
307 medium_verbose, // a bit more detailed than low
308 high_verbose // per object verbose
309 } CMVerboseLevel;
311 class YoungList;
313 // Root Regions are regions that are not empty at the beginning of a
314 // marking cycle and which we might collect during an evacuation pause
315 // while the cycle is active. Given that, during evacuation pauses, we
316 // do not copy objects that are explicitly marked, what we have to do
317 // for the root regions is to scan them and mark all objects reachable
318 // from them. According to the SATB assumptions, we only need to visit
319 // each object once during marking. So, as long as we finish this scan
320 // before the next evacuation pause, we can copy the objects from the
321 // root regions without having to mark them or do anything else to them.
322 //
323 // Currently, we only support root region scanning once (at the start
324 // of the marking cycle) and the root regions are all the survivor
325 // regions populated during the initial-mark pause.
326 class CMRootRegions VALUE_OBJ_CLASS_SPEC {
327 private:
328 YoungList* _young_list;
329 ConcurrentMark* _cm;
331 volatile bool _scan_in_progress;
332 volatile bool _should_abort;
333 HeapRegion* volatile _next_survivor;
335 public:
336 CMRootRegions();
337 // We actually do most of the initialization in this method.
338 void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
340 // Reset the claiming / scanning of the root regions.
341 void prepare_for_scan();
343 // Forces get_next() to return NULL so that the iteration aborts early.
344 void abort() { _should_abort = true; }
346 // Return true if the CM thread are actively scanning root regions,
347 // false otherwise.
348 bool scan_in_progress() { return _scan_in_progress; }
350 // Claim the next root region to scan atomically, or return NULL if
351 // all have been claimed.
352 HeapRegion* claim_next();
354 // Flag that we're done with root region scanning and notify anyone
355 // who's waiting on it. If aborted is false, assume that all regions
356 // have been claimed.
357 void scan_finished();
359 // If CM threads are still scanning root regions, wait until they
360 // are done. Return true if we had to wait, false otherwise.
361 bool wait_until_scan_finished();
362 };
364 class ConcurrentMarkThread;
366 class ConcurrentMark: public CHeapObj<mtGC> {
367 friend class CMMarkStack;
368 friend class ConcurrentMarkThread;
369 friend class CMTask;
370 friend class CMBitMapClosure;
371 friend class CMGlobalObjectClosure;
372 friend class CMRemarkTask;
373 friend class CMConcurrentMarkingTask;
374 friend class G1ParNoteEndTask;
375 friend class CalcLiveObjectsClosure;
376 friend class G1CMRefProcTaskProxy;
377 friend class G1CMRefProcTaskExecutor;
378 friend class G1CMKeepAliveAndDrainClosure;
379 friend class G1CMDrainMarkingStackClosure;
381 protected:
382 ConcurrentMarkThread* _cmThread; // the thread doing the work
383 G1CollectedHeap* _g1h; // the heap.
384 uint _parallel_marking_threads; // the number of marking
385 // threads we're use
386 uint _max_parallel_marking_threads; // max number of marking
387 // threads we'll ever use
388 double _sleep_factor; // how much we have to sleep, with
389 // respect to the work we just did, to
390 // meet the marking overhead goal
391 double _marking_task_overhead; // marking target overhead for
392 // a single task
394 // same as the two above, but for the cleanup task
395 double _cleanup_sleep_factor;
396 double _cleanup_task_overhead;
398 FreeRegionList _cleanup_list;
400 // Concurrent marking support structures
401 CMBitMap _markBitMap1;
402 CMBitMap _markBitMap2;
403 CMBitMapRO* _prevMarkBitMap; // completed mark bitmap
404 CMBitMap* _nextMarkBitMap; // under-construction mark bitmap
406 BitMap _region_bm;
407 BitMap _card_bm;
409 // Heap bounds
410 HeapWord* _heap_start;
411 HeapWord* _heap_end;
413 // Root region tracking and claiming.
414 CMRootRegions _root_regions;
416 // For gray objects
417 CMMarkStack _markStack; // Grey objects behind global finger.
418 HeapWord* volatile _finger; // the global finger, region aligned,
419 // always points to the end of the
420 // last claimed region
422 // marking tasks
423 uint _max_worker_id;// maximum worker id
424 uint _active_tasks; // task num currently active
425 CMTask** _tasks; // task queue array (max_worker_id len)
426 CMTaskQueueSet* _task_queues; // task queue set
427 ParallelTaskTerminator _terminator; // for termination
429 // Two sync barriers that are used to synchronise tasks when an
430 // overflow occurs. The algorithm is the following. All tasks enter
431 // the first one to ensure that they have all stopped manipulating
432 // the global data structures. After they exit it, they re-initialise
433 // their data structures and task 0 re-initialises the global data
434 // structures. Then, they enter the second sync barrier. This
435 // ensure, that no task starts doing work before all data
436 // structures (local and global) have been re-initialised. When they
437 // exit it, they are free to start working again.
438 WorkGangBarrierSync _first_overflow_barrier_sync;
439 WorkGangBarrierSync _second_overflow_barrier_sync;
441 // this is set by any task, when an overflow on the global data
442 // structures is detected.
443 volatile bool _has_overflown;
444 // true: marking is concurrent, false: we're in remark
445 volatile bool _concurrent;
446 // set at the end of a Full GC so that marking aborts
447 volatile bool _has_aborted;
449 // used when remark aborts due to an overflow to indicate that
450 // another concurrent marking phase should start
451 volatile bool _restart_for_overflow;
453 // This is true from the very start of concurrent marking until the
454 // point when all the tasks complete their work. It is really used
455 // to determine the points between the end of concurrent marking and
456 // time of remark.
457 volatile bool _concurrent_marking_in_progress;
459 // verbose level
460 CMVerboseLevel _verbose_level;
462 // All of these times are in ms.
463 NumberSeq _init_times;
464 NumberSeq _remark_times;
465 NumberSeq _remark_mark_times;
466 NumberSeq _remark_weak_ref_times;
467 NumberSeq _cleanup_times;
468 double _total_counting_time;
469 double _total_rs_scrub_time;
471 double* _accum_task_vtime; // accumulated task vtime
473 FlexibleWorkGang* _parallel_workers;
475 ForceOverflowSettings _force_overflow_conc;
476 ForceOverflowSettings _force_overflow_stw;
478 void weakRefsWork(bool clear_all_soft_refs);
480 void swapMarkBitMaps();
482 // It resets the global marking data structures, as well as the
483 // task local ones; should be called during initial mark.
484 void reset();
486 // Resets all the marking data structures. Called when we have to restart
487 // marking or when marking completes (via set_non_marking_state below).
488 void reset_marking_state(bool clear_overflow = true);
490 // We do this after we're done with marking so that the marking data
491 // structures are initialised to a sensible and predictable state.
492 void set_non_marking_state();
494 // It should be called to indicate which phase we're in (concurrent
495 // mark or remark) and how many threads are currently active.
496 void set_phase(uint active_tasks, bool concurrent);
498 // prints all gathered CM-related statistics
499 void print_stats();
501 bool cleanup_list_is_empty() {
502 return _cleanup_list.is_empty();
503 }
505 // accessor methods
506 uint parallel_marking_threads() const { return _parallel_marking_threads; }
507 uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;}
508 double sleep_factor() { return _sleep_factor; }
509 double marking_task_overhead() { return _marking_task_overhead;}
510 double cleanup_sleep_factor() { return _cleanup_sleep_factor; }
511 double cleanup_task_overhead() { return _cleanup_task_overhead;}
513 bool use_parallel_marking_threads() const {
514 assert(parallel_marking_threads() <=
515 max_parallel_marking_threads(), "sanity");
516 assert((_parallel_workers == NULL && parallel_marking_threads() == 0) ||
517 parallel_marking_threads() > 0,
518 "parallel workers not set up correctly");
519 return _parallel_workers != NULL;
520 }
522 HeapWord* finger() { return _finger; }
523 bool concurrent() { return _concurrent; }
524 uint active_tasks() { return _active_tasks; }
525 ParallelTaskTerminator* terminator() { return &_terminator; }
527 // It claims the next available region to be scanned by a marking
528 // task/thread. It might return NULL if the next region is empty or
529 // we have run out of regions. In the latter case, out_of_regions()
530 // determines whether we've really run out of regions or the task
531 // should call claim_region() again. This might seem a bit
532 // awkward. Originally, the code was written so that claim_region()
533 // either successfully returned with a non-empty region or there
534 // were no more regions to be claimed. The problem with this was
535 // that, in certain circumstances, it iterated over large chunks of
536 // the heap finding only empty regions and, while it was working, it
537 // was preventing the calling task to call its regular clock
538 // method. So, this way, each task will spend very little time in
539 // claim_region() and is allowed to call the regular clock method
540 // frequently.
541 HeapRegion* claim_region(uint worker_id);
543 // It determines whether we've run out of regions to scan.
544 bool out_of_regions() { return _finger == _heap_end; }
546 // Returns the task with the given id
547 CMTask* task(int id) {
548 assert(0 <= id && id < (int) _active_tasks,
549 "task id not within active bounds");
550 return _tasks[id];
551 }
553 // Returns the task queue with the given id
554 CMTaskQueue* task_queue(int id) {
555 assert(0 <= id && id < (int) _active_tasks,
556 "task queue id not within active bounds");
557 return (CMTaskQueue*) _task_queues->queue(id);
558 }
560 // Returns the task queue set
561 CMTaskQueueSet* task_queues() { return _task_queues; }
563 // Access / manipulation of the overflow flag which is set to
564 // indicate that the global stack has overflown
565 bool has_overflown() { return _has_overflown; }
566 void set_has_overflown() { _has_overflown = true; }
567 void clear_has_overflown() { _has_overflown = false; }
568 bool restart_for_overflow() { return _restart_for_overflow; }
570 bool has_aborted() { return _has_aborted; }
572 // Methods to enter the two overflow sync barriers
573 void enter_first_sync_barrier(uint worker_id);
574 void enter_second_sync_barrier(uint worker_id);
576 ForceOverflowSettings* force_overflow_conc() {
577 return &_force_overflow_conc;
578 }
580 ForceOverflowSettings* force_overflow_stw() {
581 return &_force_overflow_stw;
582 }
584 ForceOverflowSettings* force_overflow() {
585 if (concurrent()) {
586 return force_overflow_conc();
587 } else {
588 return force_overflow_stw();
589 }
590 }
592 // Live Data Counting data structures...
593 // These data structures are initialized at the start of
594 // marking. They are written to while marking is active.
595 // They are aggregated during remark; the aggregated values
596 // are then used to populate the _region_bm, _card_bm, and
597 // the total live bytes, which are then subsequently updated
598 // during cleanup.
600 // An array of bitmaps (one bit map per task). Each bitmap
601 // is used to record the cards spanned by the live objects
602 // marked by that task/worker.
603 BitMap* _count_card_bitmaps;
605 // Used to record the number of marked live bytes
606 // (for each region, by worker thread).
607 size_t** _count_marked_bytes;
609 // Card index of the bottom of the G1 heap. Used for biasing indices into
610 // the card bitmaps.
611 intptr_t _heap_bottom_card_num;
613 // Set to true when initialization is complete
614 bool _completed_initialization;
616 public:
617 // Manipulation of the global mark stack.
618 // Notice that the first mark_stack_push is CAS-based, whereas the
619 // two below are Mutex-based. This is OK since the first one is only
620 // called during evacuation pauses and doesn't compete with the
621 // other two (which are called by the marking tasks during
622 // concurrent marking or remark).
623 bool mark_stack_push(oop p) {
624 _markStack.par_push(p);
625 if (_markStack.overflow()) {
626 set_has_overflown();
627 return false;
628 }
629 return true;
630 }
631 bool mark_stack_push(oop* arr, int n) {
632 _markStack.par_push_arr(arr, n);
633 if (_markStack.overflow()) {
634 set_has_overflown();
635 return false;
636 }
637 return true;
638 }
639 void mark_stack_pop(oop* arr, int max, int* n) {
640 _markStack.par_pop_arr(arr, max, n);
641 }
642 size_t mark_stack_size() { return _markStack.size(); }
643 size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
644 bool mark_stack_overflow() { return _markStack.overflow(); }
645 bool mark_stack_empty() { return _markStack.isEmpty(); }
647 CMRootRegions* root_regions() { return &_root_regions; }
649 bool concurrent_marking_in_progress() {
650 return _concurrent_marking_in_progress;
651 }
652 void set_concurrent_marking_in_progress() {
653 _concurrent_marking_in_progress = true;
654 }
655 void clear_concurrent_marking_in_progress() {
656 _concurrent_marking_in_progress = false;
657 }
659 void update_accum_task_vtime(int i, double vtime) {
660 _accum_task_vtime[i] += vtime;
661 }
663 double all_task_accum_vtime() {
664 double ret = 0.0;
665 for (uint i = 0; i < _max_worker_id; ++i)
666 ret += _accum_task_vtime[i];
667 return ret;
668 }
670 // Attempts to steal an object from the task queues of other tasks
671 bool try_stealing(uint worker_id, int* hash_seed, oop& obj) {
672 return _task_queues->steal(worker_id, hash_seed, obj);
673 }
675 ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
676 ~ConcurrentMark();
678 ConcurrentMarkThread* cmThread() { return _cmThread; }
680 CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
681 CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; }
683 // Returns the number of GC threads to be used in a concurrent
684 // phase based on the number of GC threads being used in a STW
685 // phase.
686 uint scale_parallel_threads(uint n_par_threads);
688 // Calculates the number of GC threads to be used in a concurrent phase.
689 uint calc_parallel_marking_threads();
691 // The following three are interaction between CM and
692 // G1CollectedHeap
694 // This notifies CM that a root during initial-mark needs to be
695 // grayed. It is MT-safe. word_size is the size of the object in
696 // words. It is passed explicitly as sometimes we cannot calculate
697 // it from the given object because it might be in an inconsistent
698 // state (e.g., in to-space and being copied). So the caller is
699 // responsible for dealing with this issue (e.g., get the size from
700 // the from-space image when the to-space image might be
701 // inconsistent) and always passing the size. hr is the region that
702 // contains the object and it's passed optionally from callers who
703 // might already have it (no point in recalculating it).
704 inline void grayRoot(oop obj, size_t word_size,
705 uint worker_id, HeapRegion* hr = NULL);
707 // It iterates over the heap and for each object it comes across it
708 // will dump the contents of its reference fields, as well as
709 // liveness information for the object and its referents. The dump
710 // will be written to a file with the following name:
711 // G1PrintReachableBaseFile + "." + str.
712 // vo decides whether the prev (vo == UsePrevMarking), the next
713 // (vo == UseNextMarking) marking information, or the mark word
714 // (vo == UseMarkWord) will be used to determine the liveness of
715 // each object / referent.
716 // If all is true, all objects in the heap will be dumped, otherwise
717 // only the live ones. In the dump the following symbols / breviations
718 // are used:
719 // M : an explicitly live object (its bitmap bit is set)
720 // > : an implicitly live object (over tams)
721 // O : an object outside the G1 heap (typically: in the perm gen)
722 // NOT : a reference field whose referent is not live
723 // AND MARKED : indicates that an object is both explicitly and
724 // implicitly live (it should be one or the other, not both)
725 void print_reachable(const char* str,
726 VerifyOption vo, bool all) PRODUCT_RETURN;
728 // Clear the next marking bitmap (will be called concurrently).
729 void clearNextBitmap();
731 // These two do the work that needs to be done before and after the
732 // initial root checkpoint. Since this checkpoint can be done at two
733 // different points (i.e. an explicit pause or piggy-backed on a
734 // young collection), then it's nice to be able to easily share the
735 // pre/post code. It might be the case that we can put everything in
736 // the post method. TP
737 void checkpointRootsInitialPre();
738 void checkpointRootsInitialPost();
740 // Scan all the root regions and mark everything reachable from
741 // them.
742 void scanRootRegions();
744 // Scan a single root region and mark everything reachable from it.
745 void scanRootRegion(HeapRegion* hr, uint worker_id);
747 // Do concurrent phase of marking, to a tentative transitive closure.
748 void markFromRoots();
750 void checkpointRootsFinal(bool clear_all_soft_refs);
751 void checkpointRootsFinalWork();
752 void cleanup();
753 void completeCleanup();
755 // Mark in the previous bitmap. NB: this is usually read-only, so use
756 // this carefully!
757 inline void markPrev(oop p);
759 // Clears marks for all objects in the given range, for the prev,
760 // next, or both bitmaps. NB: the previous bitmap is usually
761 // read-only, so use this carefully!
762 void clearRangePrevBitmap(MemRegion mr);
763 void clearRangeNextBitmap(MemRegion mr);
764 void clearRangeBothBitmaps(MemRegion mr);
766 // Notify data structures that a GC has started.
767 void note_start_of_gc() {
768 _markStack.note_start_of_gc();
769 }
771 // Notify data structures that a GC is finished.
772 void note_end_of_gc() {
773 _markStack.note_end_of_gc();
774 }
776 // Verify that there are no CSet oops on the stacks (taskqueues /
777 // global mark stack), enqueued SATB buffers, per-thread SATB
778 // buffers, and fingers (global / per-task). The boolean parameters
779 // decide which of the above data structures to verify. If marking
780 // is not in progress, it's a no-op.
781 void verify_no_cset_oops(bool verify_stacks,
782 bool verify_enqueued_buffers,
783 bool verify_thread_buffers,
784 bool verify_fingers) PRODUCT_RETURN;
786 // It is called at the end of an evacuation pause during marking so
787 // that CM is notified of where the new end of the heap is. It
788 // doesn't do anything if concurrent_marking_in_progress() is false,
789 // unless the force parameter is true.
790 void update_g1_committed(bool force = false);
792 bool isMarked(oop p) const {
793 assert(p != NULL && p->is_oop(), "expected an oop");
794 HeapWord* addr = (HeapWord*)p;
795 assert(addr >= _nextMarkBitMap->startWord() ||
796 addr < _nextMarkBitMap->endWord(), "in a region");
798 return _nextMarkBitMap->isMarked(addr);
799 }
801 inline bool not_yet_marked(oop p) const;
803 // XXX Debug code
804 bool containing_card_is_marked(void* p);
805 bool containing_cards_are_marked(void* start, void* last);
807 bool isPrevMarked(oop p) const {
808 assert(p != NULL && p->is_oop(), "expected an oop");
809 HeapWord* addr = (HeapWord*)p;
810 assert(addr >= _prevMarkBitMap->startWord() ||
811 addr < _prevMarkBitMap->endWord(), "in a region");
813 return _prevMarkBitMap->isMarked(addr);
814 }
816 inline bool do_yield_check(uint worker_i = 0);
817 inline bool should_yield();
819 // Called to abort the marking cycle after a Full GC takes palce.
820 void abort();
822 // This prints the global/local fingers. It is used for debugging.
823 NOT_PRODUCT(void print_finger();)
825 void print_summary_info();
827 void print_worker_threads_on(outputStream* st) const;
829 // The following indicate whether a given verbose level has been
830 // set. Notice that anything above stats is conditional to
831 // _MARKING_VERBOSE_ having been set to 1
832 bool verbose_stats() {
833 return _verbose_level >= stats_verbose;
834 }
835 bool verbose_low() {
836 return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
837 }
838 bool verbose_medium() {
839 return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
840 }
841 bool verbose_high() {
842 return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
843 }
845 // Liveness counting
847 // Utility routine to set an exclusive range of cards on the given
848 // card liveness bitmap
849 inline void set_card_bitmap_range(BitMap* card_bm,
850 BitMap::idx_t start_idx,
851 BitMap::idx_t end_idx,
852 bool is_par);
854 // Returns the card number of the bottom of the G1 heap.
855 // Used in biasing indices into accounting card bitmaps.
856 intptr_t heap_bottom_card_num() const {
857 return _heap_bottom_card_num;
858 }
860 // Returns the card bitmap for a given task or worker id.
861 BitMap* count_card_bitmap_for(uint worker_id) {
862 assert(0 <= worker_id && worker_id < _max_worker_id, "oob");
863 assert(_count_card_bitmaps != NULL, "uninitialized");
864 BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
865 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
866 return task_card_bm;
867 }
869 // Returns the array containing the marked bytes for each region,
870 // for the given worker or task id.
871 size_t* count_marked_bytes_array_for(uint worker_id) {
872 assert(0 <= worker_id && worker_id < _max_worker_id, "oob");
873 assert(_count_marked_bytes != NULL, "uninitialized");
874 size_t* marked_bytes_array = _count_marked_bytes[worker_id];
875 assert(marked_bytes_array != NULL, "uninitialized");
876 return marked_bytes_array;
877 }
879 // Returns the index in the liveness accounting card table bitmap
880 // for the given address
881 inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
883 // Counts the size of the given memory region in the the given
884 // marked_bytes array slot for the given HeapRegion.
885 // Sets the bits in the given card bitmap that are associated with the
886 // cards that are spanned by the memory region.
887 inline void count_region(MemRegion mr, HeapRegion* hr,
888 size_t* marked_bytes_array,
889 BitMap* task_card_bm);
891 // Counts the given memory region in the task/worker counting
892 // data structures for the given worker id.
893 inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
895 // Counts the given memory region in the task/worker counting
896 // data structures for the given worker id.
897 inline void count_region(MemRegion mr, uint worker_id);
899 // Counts the given object in the given task/worker counting
900 // data structures.
901 inline void count_object(oop obj, HeapRegion* hr,
902 size_t* marked_bytes_array,
903 BitMap* task_card_bm);
905 // Counts the given object in the task/worker counting data
906 // structures for the given worker id.
907 inline void count_object(oop obj, HeapRegion* hr, uint worker_id);
909 // Attempts to mark the given object and, if successful, counts
910 // the object in the given task/worker counting structures.
911 inline bool par_mark_and_count(oop obj, HeapRegion* hr,
912 size_t* marked_bytes_array,
913 BitMap* task_card_bm);
915 // Attempts to mark the given object and, if successful, counts
916 // the object in the task/worker counting structures for the
917 // given worker id.
918 inline bool par_mark_and_count(oop obj, size_t word_size,
919 HeapRegion* hr, uint worker_id);
921 // Attempts to mark the given object and, if successful, counts
922 // the object in the task/worker counting structures for the
923 // given worker id.
924 inline bool par_mark_and_count(oop obj, HeapRegion* hr, uint worker_id);
926 // Similar to the above routine but we don't know the heap region that
927 // contains the object to be marked/counted, which this routine looks up.
928 inline bool par_mark_and_count(oop obj, uint worker_id);
930 // Similar to the above routine but there are times when we cannot
931 // safely calculate the size of obj due to races and we, therefore,
932 // pass the size in as a parameter. It is the caller's reponsibility
933 // to ensure that the size passed in for obj is valid.
934 inline bool par_mark_and_count(oop obj, size_t word_size, uint worker_id);
936 // Unconditionally mark the given object, and unconditinally count
937 // the object in the counting structures for worker id 0.
938 // Should *not* be called from parallel code.
939 inline bool mark_and_count(oop obj, HeapRegion* hr);
941 // Similar to the above routine but we don't know the heap region that
942 // contains the object to be marked/counted, which this routine looks up.
943 // Should *not* be called from parallel code.
944 inline bool mark_and_count(oop obj);
946 // Returns true if initialization was successfully completed.
947 bool completed_initialization() const {
948 return _completed_initialization;
949 }
951 protected:
952 // Clear all the per-task bitmaps and arrays used to store the
953 // counting data.
954 void clear_all_count_data();
956 // Aggregates the counting data for each worker/task
957 // that was constructed while marking. Also sets
958 // the amount of marked bytes for each region and
959 // the top at concurrent mark count.
960 void aggregate_count_data();
962 // Verification routine
963 void verify_count_data();
964 };
966 // A class representing a marking task.
967 class CMTask : public TerminatorTerminator {
968 private:
969 enum PrivateConstants {
970 // the regular clock call is called once the scanned words reaches
971 // this limit
972 words_scanned_period = 12*1024,
973 // the regular clock call is called once the number of visited
974 // references reaches this limit
975 refs_reached_period = 384,
976 // initial value for the hash seed, used in the work stealing code
977 init_hash_seed = 17,
978 // how many entries will be transferred between global stack and
979 // local queues
980 global_stack_transfer_size = 16
981 };
983 uint _worker_id;
984 G1CollectedHeap* _g1h;
985 ConcurrentMark* _cm;
986 CMBitMap* _nextMarkBitMap;
987 // the task queue of this task
988 CMTaskQueue* _task_queue;
989 private:
990 // the task queue set---needed for stealing
991 CMTaskQueueSet* _task_queues;
992 // indicates whether the task has been claimed---this is only for
993 // debugging purposes
994 bool _claimed;
996 // number of calls to this task
997 int _calls;
999 // when the virtual timer reaches this time, the marking step should
1000 // exit
1001 double _time_target_ms;
1002 // the start time of the current marking step
1003 double _start_time_ms;
1005 // the oop closure used for iterations over oops
1006 G1CMOopClosure* _cm_oop_closure;
1008 // the region this task is scanning, NULL if we're not scanning any
1009 HeapRegion* _curr_region;
1010 // the local finger of this task, NULL if we're not scanning a region
1011 HeapWord* _finger;
1012 // limit of the region this task is scanning, NULL if we're not scanning one
1013 HeapWord* _region_limit;
1015 // the number of words this task has scanned
1016 size_t _words_scanned;
1017 // When _words_scanned reaches this limit, the regular clock is
1018 // called. Notice that this might be decreased under certain
1019 // circumstances (i.e. when we believe that we did an expensive
1020 // operation).
1021 size_t _words_scanned_limit;
1022 // the initial value of _words_scanned_limit (i.e. what it was
1023 // before it was decreased).
1024 size_t _real_words_scanned_limit;
1026 // the number of references this task has visited
1027 size_t _refs_reached;
1028 // When _refs_reached reaches this limit, the regular clock is
1029 // called. Notice this this might be decreased under certain
1030 // circumstances (i.e. when we believe that we did an expensive
1031 // operation).
1032 size_t _refs_reached_limit;
1033 // the initial value of _refs_reached_limit (i.e. what it was before
1034 // it was decreased).
1035 size_t _real_refs_reached_limit;
1037 // used by the work stealing stuff
1038 int _hash_seed;
1039 // if this is true, then the task has aborted for some reason
1040 bool _has_aborted;
1041 // set when the task aborts because it has met its time quota
1042 bool _has_timed_out;
1043 // true when we're draining SATB buffers; this avoids the task
1044 // aborting due to SATB buffers being available (as we're already
1045 // dealing with them)
1046 bool _draining_satb_buffers;
1048 // number sequence of past step times
1049 NumberSeq _step_times_ms;
1050 // elapsed time of this task
1051 double _elapsed_time_ms;
1052 // termination time of this task
1053 double _termination_time_ms;
1054 // when this task got into the termination protocol
1055 double _termination_start_time_ms;
1057 // true when the task is during a concurrent phase, false when it is
1058 // in the remark phase (so, in the latter case, we do not have to
1059 // check all the things that we have to check during the concurrent
1060 // phase, i.e. SATB buffer availability...)
1061 bool _concurrent;
1063 TruncatedSeq _marking_step_diffs_ms;
1065 // Counting data structures. Embedding the task's marked_bytes_array
1066 // and card bitmap into the actual task saves having to go through
1067 // the ConcurrentMark object.
1068 size_t* _marked_bytes_array;
1069 BitMap* _card_bm;
1071 // LOTS of statistics related with this task
1072 #if _MARKING_STATS_
1073 NumberSeq _all_clock_intervals_ms;
1074 double _interval_start_time_ms;
1076 int _aborted;
1077 int _aborted_overflow;
1078 int _aborted_cm_aborted;
1079 int _aborted_yield;
1080 int _aborted_timed_out;
1081 int _aborted_satb;
1082 int _aborted_termination;
1084 int _steal_attempts;
1085 int _steals;
1087 int _clock_due_to_marking;
1088 int _clock_due_to_scanning;
1090 int _local_pushes;
1091 int _local_pops;
1092 int _local_max_size;
1093 int _objs_scanned;
1095 int _global_pushes;
1096 int _global_pops;
1097 int _global_max_size;
1099 int _global_transfers_to;
1100 int _global_transfers_from;
1102 int _regions_claimed;
1103 int _objs_found_on_bitmap;
1105 int _satb_buffers_processed;
1106 #endif // _MARKING_STATS_
1108 // it updates the local fields after this task has claimed
1109 // a new region to scan
1110 void setup_for_region(HeapRegion* hr);
1111 // it brings up-to-date the limit of the region
1112 void update_region_limit();
1114 // called when either the words scanned or the refs visited limit
1115 // has been reached
1116 void reached_limit();
1117 // recalculates the words scanned and refs visited limits
1118 void recalculate_limits();
1119 // decreases the words scanned and refs visited limits when we reach
1120 // an expensive operation
1121 void decrease_limits();
1122 // it checks whether the words scanned or refs visited reached their
1123 // respective limit and calls reached_limit() if they have
1124 void check_limits() {
1125 if (_words_scanned >= _words_scanned_limit ||
1126 _refs_reached >= _refs_reached_limit) {
1127 reached_limit();
1128 }
1129 }
1130 // this is supposed to be called regularly during a marking step as
1131 // it checks a bunch of conditions that might cause the marking step
1132 // to abort
1133 void regular_clock_call();
1134 bool concurrent() { return _concurrent; }
1136 public:
1137 // It resets the task; it should be called right at the beginning of
1138 // a marking phase.
1139 void reset(CMBitMap* _nextMarkBitMap);
1140 // it clears all the fields that correspond to a claimed region.
1141 void clear_region_fields();
1143 void set_concurrent(bool concurrent) { _concurrent = concurrent; }
1145 // The main method of this class which performs a marking step
1146 // trying not to exceed the given duration. However, it might exit
1147 // prematurely, according to some conditions (i.e. SATB buffers are
1148 // available for processing).
1149 void do_marking_step(double target_ms, bool do_stealing, bool do_termination);
1151 // These two calls start and stop the timer
1152 void record_start_time() {
1153 _elapsed_time_ms = os::elapsedTime() * 1000.0;
1154 }
1155 void record_end_time() {
1156 _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
1157 }
1159 // returns the worker ID associated with this task.
1160 uint worker_id() { return _worker_id; }
1162 // From TerminatorTerminator. It determines whether this task should
1163 // exit the termination protocol after it's entered it.
1164 virtual bool should_exit_termination();
1166 // Resets the local region fields after a task has finished scanning a
1167 // region; or when they have become stale as a result of the region
1168 // being evacuated.
1169 void giveup_current_region();
1171 HeapWord* finger() { return _finger; }
1173 bool has_aborted() { return _has_aborted; }
1174 void set_has_aborted() { _has_aborted = true; }
1175 void clear_has_aborted() { _has_aborted = false; }
1176 bool has_timed_out() { return _has_timed_out; }
1177 bool claimed() { return _claimed; }
1179 void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
1181 // It grays the object by marking it and, if necessary, pushing it
1182 // on the local queue
1183 inline void deal_with_reference(oop obj);
1185 // It scans an object and visits its children.
1186 void scan_object(oop obj);
1188 // It pushes an object on the local queue.
1189 inline void push(oop obj);
1191 // These two move entries to/from the global stack.
1192 void move_entries_to_global_stack();
1193 void get_entries_from_global_stack();
1195 // It pops and scans objects from the local queue. If partially is
1196 // true, then it stops when the queue size is of a given limit. If
1197 // partially is false, then it stops when the queue is empty.
1198 void drain_local_queue(bool partially);
1199 // It moves entries from the global stack to the local queue and
1200 // drains the local queue. If partially is true, then it stops when
1201 // both the global stack and the local queue reach a given size. If
1202 // partially if false, it tries to empty them totally.
1203 void drain_global_stack(bool partially);
1204 // It keeps picking SATB buffers and processing them until no SATB
1205 // buffers are available.
1206 void drain_satb_buffers();
1208 // moves the local finger to a new location
1209 inline void move_finger_to(HeapWord* new_finger) {
1210 assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
1211 _finger = new_finger;
1212 }
1214 CMTask(uint worker_id, ConcurrentMark *cm,
1215 size_t* marked_bytes, BitMap* card_bm,
1216 CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
1218 // it prints statistics associated with this task
1219 void print_stats();
1221 #if _MARKING_STATS_
1222 void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
1223 #endif // _MARKING_STATS_
1224 };
1226 // Class that's used to to print out per-region liveness
1227 // information. It's currently used at the end of marking and also
1228 // after we sort the old regions at the end of the cleanup operation.
1229 class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
1230 private:
1231 outputStream* _out;
1233 // Accumulators for these values.
1234 size_t _total_used_bytes;
1235 size_t _total_capacity_bytes;
1236 size_t _total_prev_live_bytes;
1237 size_t _total_next_live_bytes;
1239 // These are set up when we come across a "stars humongous" region
1240 // (as this is where most of this information is stored, not in the
1241 // subsequent "continues humongous" regions). After that, for every
1242 // region in a given humongous region series we deduce the right
1243 // values for it by simply subtracting the appropriate amount from
1244 // these fields. All these values should reach 0 after we've visited
1245 // the last region in the series.
1246 size_t _hum_used_bytes;
1247 size_t _hum_capacity_bytes;
1248 size_t _hum_prev_live_bytes;
1249 size_t _hum_next_live_bytes;
1251 static double perc(size_t val, size_t total) {
1252 if (total == 0) {
1253 return 0.0;
1254 } else {
1255 return 100.0 * ((double) val / (double) total);
1256 }
1257 }
1259 static double bytes_to_mb(size_t val) {
1260 return (double) val / (double) M;
1261 }
1263 // See the .cpp file.
1264 size_t get_hum_bytes(size_t* hum_bytes);
1265 void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
1266 size_t* prev_live_bytes, size_t* next_live_bytes);
1268 public:
1269 // The header and footer are printed in the constructor and
1270 // destructor respectively.
1271 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name);
1272 virtual bool doHeapRegion(HeapRegion* r);
1273 ~G1PrintRegionLivenessInfoClosure();
1274 };
1276 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP