Tue, 21 Aug 2012 14:10:39 -0700
7185699: G1: Prediction model discrepancies
Summary: Correct the result value of G1CollectedHeap::pending_card_num(). Change the code that calculates the GC efficiency of a non-young heap region to use historical data from mixed GCs and the actual number of live bytes when predicting how long it would take to collect the region. Changes were also reviewed by Thomas Schatzl.
Reviewed-by: azeemj, brutisso
1 /*
2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
28 #include "gc_implementation/g1/heapRegionSets.hpp"
29 #include "utilities/taskqueue.hpp"
31 class G1CollectedHeap;
32 class CMTask;
33 typedef GenericTaskQueue<oop, mtGC> CMTaskQueue;
34 typedef GenericTaskQueueSet<CMTaskQueue, mtGC> CMTaskQueueSet;
36 // Closure used by CM during concurrent reference discovery
37 // and reference processing (during remarking) to determine
38 // if a particular object is alive. It is primarily used
39 // to determine if referents of discovered reference objects
40 // are alive. An instance is also embedded into the
41 // reference processor as the _is_alive_non_header field
42 class G1CMIsAliveClosure: public BoolObjectClosure {
43 G1CollectedHeap* _g1;
44 public:
45 G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { }
47 void do_object(oop obj) {
48 ShouldNotCallThis();
49 }
50 bool do_object_b(oop obj);
51 };
53 // A generic CM bit map. This is essentially a wrapper around the BitMap
54 // class, with one bit per (1<<_shifter) HeapWords.
56 class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
57 protected:
58 HeapWord* _bmStartWord; // base address of range covered by map
59 size_t _bmWordSize; // map size (in #HeapWords covered)
60 const int _shifter; // map to char or bit
61 VirtualSpace _virtual_space; // underlying the bit map
62 BitMap _bm; // the bit map itself
64 public:
65 // constructor
66 CMBitMapRO(ReservedSpace rs, int shifter);
68 enum { do_yield = true };
70 // inquiries
71 HeapWord* startWord() const { return _bmStartWord; }
72 size_t sizeInWords() const { return _bmWordSize; }
73 // the following is one past the last word in space
74 HeapWord* endWord() const { return _bmStartWord + _bmWordSize; }
76 // read marks
78 bool isMarked(HeapWord* addr) const {
79 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
80 "outside underlying space?");
81 return _bm.at(heapWordToOffset(addr));
82 }
84 // iteration
85 inline bool iterate(BitMapClosure* cl, MemRegion mr);
86 inline bool iterate(BitMapClosure* cl);
88 // Return the address corresponding to the next marked bit at or after
89 // "addr", and before "limit", if "limit" is non-NULL. If there is no
90 // such bit, returns "limit" if that is non-NULL, or else "endWord()".
91 HeapWord* getNextMarkedWordAddress(HeapWord* addr,
92 HeapWord* limit = NULL) const;
93 // Return the address corresponding to the next unmarked bit at or after
94 // "addr", and before "limit", if "limit" is non-NULL. If there is no
95 // such bit, returns "limit" if that is non-NULL, or else "endWord()".
96 HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
97 HeapWord* limit = NULL) const;
99 // conversion utilities
100 // XXX Fix these so that offsets are size_t's...
101 HeapWord* offsetToHeapWord(size_t offset) const {
102 return _bmStartWord + (offset << _shifter);
103 }
104 size_t heapWordToOffset(HeapWord* addr) const {
105 return pointer_delta(addr, _bmStartWord) >> _shifter;
106 }
107 int heapWordDiffToOffsetDiff(size_t diff) const;
108 HeapWord* nextWord(HeapWord* addr) {
109 return offsetToHeapWord(heapWordToOffset(addr) + 1);
110 }
112 // debugging
113 NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
114 };
116 class CMBitMap : public CMBitMapRO {
118 public:
119 // constructor
120 CMBitMap(ReservedSpace rs, int shifter) :
121 CMBitMapRO(rs, shifter) {}
123 // write marks
124 void mark(HeapWord* addr) {
125 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
126 "outside underlying space?");
127 _bm.set_bit(heapWordToOffset(addr));
128 }
129 void clear(HeapWord* addr) {
130 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
131 "outside underlying space?");
132 _bm.clear_bit(heapWordToOffset(addr));
133 }
134 bool parMark(HeapWord* addr) {
135 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
136 "outside underlying space?");
137 return _bm.par_set_bit(heapWordToOffset(addr));
138 }
139 bool parClear(HeapWord* addr) {
140 assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
141 "outside underlying space?");
142 return _bm.par_clear_bit(heapWordToOffset(addr));
143 }
144 void markRange(MemRegion mr);
145 void clearAll();
146 void clearRange(MemRegion mr);
148 // Starting at the bit corresponding to "addr" (inclusive), find the next
149 // "1" bit, if any. This bit starts some run of consecutive "1"'s; find
150 // the end of this run (stopping at "end_addr"). Return the MemRegion
151 // covering from the start of the region corresponding to the first bit
152 // of the run to the end of the region corresponding to the last bit of
153 // the run. If there is no "1" bit at or after "addr", return an empty
154 // MemRegion.
155 MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
156 };
158 // Represents a marking stack used by the CM collector.
159 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
160 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
161 ConcurrentMark* _cm;
162 oop* _base; // bottom of stack
163 jint _index; // one more than last occupied index
164 jint _capacity; // max #elements
165 jint _saved_index; // value of _index saved at start of GC
166 NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run
168 bool _overflow;
169 DEBUG_ONLY(bool _drain_in_progress;)
170 DEBUG_ONLY(bool _drain_in_progress_yields;)
172 public:
173 CMMarkStack(ConcurrentMark* cm);
174 ~CMMarkStack();
176 void allocate(size_t size);
178 oop pop() {
179 if (!isEmpty()) {
180 return _base[--_index] ;
181 }
182 return NULL;
183 }
185 // If overflow happens, don't do the push, and record the overflow.
186 // *Requires* that "ptr" is already marked.
187 void push(oop ptr) {
188 if (isFull()) {
189 // Record overflow.
190 _overflow = true;
191 return;
192 } else {
193 _base[_index++] = ptr;
194 NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
195 }
196 }
197 // Non-block impl. Note: concurrency is allowed only with other
198 // "par_push" operations, not with "pop" or "drain". We would need
199 // parallel versions of them if such concurrency was desired.
200 void par_push(oop ptr);
202 // Pushes the first "n" elements of "ptr_arr" on the stack.
203 // Non-block impl. Note: concurrency is allowed only with other
204 // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
205 void par_adjoin_arr(oop* ptr_arr, int n);
207 // Pushes the first "n" elements of "ptr_arr" on the stack.
208 // Locking impl: concurrency is allowed only with
209 // "par_push_arr" and/or "par_pop_arr" operations, which use the same
210 // locking strategy.
211 void par_push_arr(oop* ptr_arr, int n);
213 // If returns false, the array was empty. Otherwise, removes up to "max"
214 // elements from the stack, and transfers them to "ptr_arr" in an
215 // unspecified order. The actual number transferred is given in "n" ("n
216 // == 0" is deliberately redundant with the return value.) Locking impl:
217 // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
218 // operations, which use the same locking strategy.
219 bool par_pop_arr(oop* ptr_arr, int max, int* n);
221 // Drain the mark stack, applying the given closure to all fields of
222 // objects on the stack. (That is, continue until the stack is empty,
223 // even if closure applications add entries to the stack.) The "bm"
224 // argument, if non-null, may be used to verify that only marked objects
225 // are on the mark stack. If "yield_after" is "true", then the
226 // concurrent marker performing the drain offers to yield after
227 // processing each object. If a yield occurs, stops the drain operation
228 // and returns false. Otherwise, returns true.
229 template<class OopClosureClass>
230 bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
232 bool isEmpty() { return _index == 0; }
233 bool isFull() { return _index == _capacity; }
234 int maxElems() { return _capacity; }
236 bool overflow() { return _overflow; }
237 void clear_overflow() { _overflow = false; }
239 int size() { return _index; }
241 void setEmpty() { _index = 0; clear_overflow(); }
243 // Record the current index.
244 void note_start_of_gc();
246 // Make sure that we have not added any entries to the stack during GC.
247 void note_end_of_gc();
249 // iterate over the oops in the mark stack, up to the bound recorded via
250 // the call above.
251 void oops_do(OopClosure* f);
252 };
254 class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
255 private:
256 #ifndef PRODUCT
257 uintx _num_remaining;
258 bool _force;
259 #endif // !defined(PRODUCT)
261 public:
262 void init() PRODUCT_RETURN;
263 void update() PRODUCT_RETURN;
264 bool should_force() PRODUCT_RETURN_( return false; );
265 };
267 // this will enable a variety of different statistics per GC task
268 #define _MARKING_STATS_ 0
269 // this will enable the higher verbose levels
270 #define _MARKING_VERBOSE_ 0
272 #if _MARKING_STATS_
273 #define statsOnly(statement) \
274 do { \
275 statement ; \
276 } while (0)
277 #else // _MARKING_STATS_
278 #define statsOnly(statement) \
279 do { \
280 } while (0)
281 #endif // _MARKING_STATS_
283 typedef enum {
284 no_verbose = 0, // verbose turned off
285 stats_verbose, // only prints stats at the end of marking
286 low_verbose, // low verbose, mostly per region and per major event
287 medium_verbose, // a bit more detailed than low
288 high_verbose // per object verbose
289 } CMVerboseLevel;
291 class YoungList;
293 // Root Regions are regions that are not empty at the beginning of a
294 // marking cycle and which we might collect during an evacuation pause
295 // while the cycle is active. Given that, during evacuation pauses, we
296 // do not copy objects that are explicitly marked, what we have to do
297 // for the root regions is to scan them and mark all objects reachable
298 // from them. According to the SATB assumptions, we only need to visit
299 // each object once during marking. So, as long as we finish this scan
300 // before the next evacuation pause, we can copy the objects from the
301 // root regions without having to mark them or do anything else to them.
302 //
303 // Currently, we only support root region scanning once (at the start
304 // of the marking cycle) and the root regions are all the survivor
305 // regions populated during the initial-mark pause.
306 class CMRootRegions VALUE_OBJ_CLASS_SPEC {
307 private:
308 YoungList* _young_list;
309 ConcurrentMark* _cm;
311 volatile bool _scan_in_progress;
312 volatile bool _should_abort;
313 HeapRegion* volatile _next_survivor;
315 public:
316 CMRootRegions();
317 // We actually do most of the initialization in this method.
318 void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
320 // Reset the claiming / scanning of the root regions.
321 void prepare_for_scan();
323 // Forces get_next() to return NULL so that the iteration aborts early.
324 void abort() { _should_abort = true; }
326 // Return true if the CM thread are actively scanning root regions,
327 // false otherwise.
328 bool scan_in_progress() { return _scan_in_progress; }
330 // Claim the next root region to scan atomically, or return NULL if
331 // all have been claimed.
332 HeapRegion* claim_next();
334 // Flag that we're done with root region scanning and notify anyone
335 // who's waiting on it. If aborted is false, assume that all regions
336 // have been claimed.
337 void scan_finished();
339 // If CM threads are still scanning root regions, wait until they
340 // are done. Return true if we had to wait, false otherwise.
341 bool wait_until_scan_finished();
342 };
344 class ConcurrentMarkThread;
346 class ConcurrentMark: public CHeapObj<mtGC> {
347 friend class ConcurrentMarkThread;
348 friend class CMTask;
349 friend class CMBitMapClosure;
350 friend class CMGlobalObjectClosure;
351 friend class CMRemarkTask;
352 friend class CMConcurrentMarkingTask;
353 friend class G1ParNoteEndTask;
354 friend class CalcLiveObjectsClosure;
355 friend class G1CMRefProcTaskProxy;
356 friend class G1CMRefProcTaskExecutor;
357 friend class G1CMParKeepAliveAndDrainClosure;
358 friend class G1CMParDrainMarkingStackClosure;
360 protected:
361 ConcurrentMarkThread* _cmThread; // the thread doing the work
362 G1CollectedHeap* _g1h; // the heap.
363 uint _parallel_marking_threads; // the number of marking
364 // threads we're use
365 uint _max_parallel_marking_threads; // max number of marking
366 // threads we'll ever use
367 double _sleep_factor; // how much we have to sleep, with
368 // respect to the work we just did, to
369 // meet the marking overhead goal
370 double _marking_task_overhead; // marking target overhead for
371 // a single task
373 // same as the two above, but for the cleanup task
374 double _cleanup_sleep_factor;
375 double _cleanup_task_overhead;
377 FreeRegionList _cleanup_list;
379 // Concurrent marking support structures
380 CMBitMap _markBitMap1;
381 CMBitMap _markBitMap2;
382 CMBitMapRO* _prevMarkBitMap; // completed mark bitmap
383 CMBitMap* _nextMarkBitMap; // under-construction mark bitmap
385 BitMap _region_bm;
386 BitMap _card_bm;
388 // Heap bounds
389 HeapWord* _heap_start;
390 HeapWord* _heap_end;
392 // Root region tracking and claiming.
393 CMRootRegions _root_regions;
395 // For gray objects
396 CMMarkStack _markStack; // Grey objects behind global finger.
397 HeapWord* volatile _finger; // the global finger, region aligned,
398 // always points to the end of the
399 // last claimed region
401 // marking tasks
402 uint _max_task_num; // maximum task number
403 uint _active_tasks; // task num currently active
404 CMTask** _tasks; // task queue array (max_task_num len)
405 CMTaskQueueSet* _task_queues; // task queue set
406 ParallelTaskTerminator _terminator; // for termination
408 // Two sync barriers that are used to synchronise tasks when an
409 // overflow occurs. The algorithm is the following. All tasks enter
410 // the first one to ensure that they have all stopped manipulating
411 // the global data structures. After they exit it, they re-initialise
412 // their data structures and task 0 re-initialises the global data
413 // structures. Then, they enter the second sync barrier. This
414 // ensure, that no task starts doing work before all data
415 // structures (local and global) have been re-initialised. When they
416 // exit it, they are free to start working again.
417 WorkGangBarrierSync _first_overflow_barrier_sync;
418 WorkGangBarrierSync _second_overflow_barrier_sync;
420 // this is set by any task, when an overflow on the global data
421 // structures is detected.
422 volatile bool _has_overflown;
423 // true: marking is concurrent, false: we're in remark
424 volatile bool _concurrent;
425 // set at the end of a Full GC so that marking aborts
426 volatile bool _has_aborted;
428 // used when remark aborts due to an overflow to indicate that
429 // another concurrent marking phase should start
430 volatile bool _restart_for_overflow;
432 // This is true from the very start of concurrent marking until the
433 // point when all the tasks complete their work. It is really used
434 // to determine the points between the end of concurrent marking and
435 // time of remark.
436 volatile bool _concurrent_marking_in_progress;
438 // verbose level
439 CMVerboseLevel _verbose_level;
441 // All of these times are in ms.
442 NumberSeq _init_times;
443 NumberSeq _remark_times;
444 NumberSeq _remark_mark_times;
445 NumberSeq _remark_weak_ref_times;
446 NumberSeq _cleanup_times;
447 double _total_counting_time;
448 double _total_rs_scrub_time;
450 double* _accum_task_vtime; // accumulated task vtime
452 FlexibleWorkGang* _parallel_workers;
454 ForceOverflowSettings _force_overflow_conc;
455 ForceOverflowSettings _force_overflow_stw;
457 void weakRefsWork(bool clear_all_soft_refs);
459 void swapMarkBitMaps();
461 // It resets the global marking data structures, as well as the
462 // task local ones; should be called during initial mark.
463 void reset();
464 // It resets all the marking data structures.
465 void clear_marking_state(bool clear_overflow = true);
467 // It should be called to indicate which phase we're in (concurrent
468 // mark or remark) and how many threads are currently active.
469 void set_phase(uint active_tasks, bool concurrent);
470 // We do this after we're done with marking so that the marking data
471 // structures are initialised to a sensible and predictable state.
472 void set_non_marking_state();
474 // prints all gathered CM-related statistics
475 void print_stats();
477 bool cleanup_list_is_empty() {
478 return _cleanup_list.is_empty();
479 }
481 // accessor methods
482 uint parallel_marking_threads() { return _parallel_marking_threads; }
483 uint max_parallel_marking_threads() { return _max_parallel_marking_threads;}
484 double sleep_factor() { return _sleep_factor; }
485 double marking_task_overhead() { return _marking_task_overhead;}
486 double cleanup_sleep_factor() { return _cleanup_sleep_factor; }
487 double cleanup_task_overhead() { return _cleanup_task_overhead;}
489 HeapWord* finger() { return _finger; }
490 bool concurrent() { return _concurrent; }
491 uint active_tasks() { return _active_tasks; }
492 ParallelTaskTerminator* terminator() { return &_terminator; }
494 // It claims the next available region to be scanned by a marking
495 // task. It might return NULL if the next region is empty or we have
496 // run out of regions. In the latter case, out_of_regions()
497 // determines whether we've really run out of regions or the task
498 // should call claim_region() again. This might seem a bit
499 // awkward. Originally, the code was written so that claim_region()
500 // either successfully returned with a non-empty region or there
501 // were no more regions to be claimed. The problem with this was
502 // that, in certain circumstances, it iterated over large chunks of
503 // the heap finding only empty regions and, while it was working, it
504 // was preventing the calling task to call its regular clock
505 // method. So, this way, each task will spend very little time in
506 // claim_region() and is allowed to call the regular clock method
507 // frequently.
508 HeapRegion* claim_region(int task);
510 // It determines whether we've run out of regions to scan.
511 bool out_of_regions() { return _finger == _heap_end; }
513 // Returns the task with the given id
514 CMTask* task(int id) {
515 assert(0 <= id && id < (int) _active_tasks,
516 "task id not within active bounds");
517 return _tasks[id];
518 }
520 // Returns the task queue with the given id
521 CMTaskQueue* task_queue(int id) {
522 assert(0 <= id && id < (int) _active_tasks,
523 "task queue id not within active bounds");
524 return (CMTaskQueue*) _task_queues->queue(id);
525 }
527 // Returns the task queue set
528 CMTaskQueueSet* task_queues() { return _task_queues; }
530 // Access / manipulation of the overflow flag which is set to
531 // indicate that the global stack has overflown
532 bool has_overflown() { return _has_overflown; }
533 void set_has_overflown() { _has_overflown = true; }
534 void clear_has_overflown() { _has_overflown = false; }
535 bool restart_for_overflow() { return _restart_for_overflow; }
537 bool has_aborted() { return _has_aborted; }
539 // Methods to enter the two overflow sync barriers
540 void enter_first_sync_barrier(int task_num);
541 void enter_second_sync_barrier(int task_num);
543 ForceOverflowSettings* force_overflow_conc() {
544 return &_force_overflow_conc;
545 }
547 ForceOverflowSettings* force_overflow_stw() {
548 return &_force_overflow_stw;
549 }
551 ForceOverflowSettings* force_overflow() {
552 if (concurrent()) {
553 return force_overflow_conc();
554 } else {
555 return force_overflow_stw();
556 }
557 }
559 // Live Data Counting data structures...
560 // These data structures are initialized at the start of
561 // marking. They are written to while marking is active.
562 // They are aggregated during remark; the aggregated values
563 // are then used to populate the _region_bm, _card_bm, and
564 // the total live bytes, which are then subsequently updated
565 // during cleanup.
567 // An array of bitmaps (one bit map per task). Each bitmap
568 // is used to record the cards spanned by the live objects
569 // marked by that task/worker.
570 BitMap* _count_card_bitmaps;
572 // Used to record the number of marked live bytes
573 // (for each region, by worker thread).
574 size_t** _count_marked_bytes;
576 // Card index of the bottom of the G1 heap. Used for biasing indices into
577 // the card bitmaps.
578 intptr_t _heap_bottom_card_num;
580 public:
581 // Manipulation of the global mark stack.
582 // Notice that the first mark_stack_push is CAS-based, whereas the
583 // two below are Mutex-based. This is OK since the first one is only
584 // called during evacuation pauses and doesn't compete with the
585 // other two (which are called by the marking tasks during
586 // concurrent marking or remark).
587 bool mark_stack_push(oop p) {
588 _markStack.par_push(p);
589 if (_markStack.overflow()) {
590 set_has_overflown();
591 return false;
592 }
593 return true;
594 }
595 bool mark_stack_push(oop* arr, int n) {
596 _markStack.par_push_arr(arr, n);
597 if (_markStack.overflow()) {
598 set_has_overflown();
599 return false;
600 }
601 return true;
602 }
603 void mark_stack_pop(oop* arr, int max, int* n) {
604 _markStack.par_pop_arr(arr, max, n);
605 }
606 size_t mark_stack_size() { return _markStack.size(); }
607 size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
608 bool mark_stack_overflow() { return _markStack.overflow(); }
609 bool mark_stack_empty() { return _markStack.isEmpty(); }
611 CMRootRegions* root_regions() { return &_root_regions; }
613 bool concurrent_marking_in_progress() {
614 return _concurrent_marking_in_progress;
615 }
616 void set_concurrent_marking_in_progress() {
617 _concurrent_marking_in_progress = true;
618 }
619 void clear_concurrent_marking_in_progress() {
620 _concurrent_marking_in_progress = false;
621 }
623 void update_accum_task_vtime(int i, double vtime) {
624 _accum_task_vtime[i] += vtime;
625 }
627 double all_task_accum_vtime() {
628 double ret = 0.0;
629 for (int i = 0; i < (int)_max_task_num; ++i)
630 ret += _accum_task_vtime[i];
631 return ret;
632 }
634 // Attempts to steal an object from the task queues of other tasks
635 bool try_stealing(int task_num, int* hash_seed, oop& obj) {
636 return _task_queues->steal(task_num, hash_seed, obj);
637 }
639 ConcurrentMark(ReservedSpace rs, uint max_regions);
640 ~ConcurrentMark();
642 ConcurrentMarkThread* cmThread() { return _cmThread; }
644 CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
645 CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; }
647 // Returns the number of GC threads to be used in a concurrent
648 // phase based on the number of GC threads being used in a STW
649 // phase.
650 uint scale_parallel_threads(uint n_par_threads);
652 // Calculates the number of GC threads to be used in a concurrent phase.
653 uint calc_parallel_marking_threads();
655 // The following three are interaction between CM and
656 // G1CollectedHeap
658 // This notifies CM that a root during initial-mark needs to be
659 // grayed. It is MT-safe. word_size is the size of the object in
660 // words. It is passed explicitly as sometimes we cannot calculate
661 // it from the given object because it might be in an inconsistent
662 // state (e.g., in to-space and being copied). So the caller is
663 // responsible for dealing with this issue (e.g., get the size from
664 // the from-space image when the to-space image might be
665 // inconsistent) and always passing the size. hr is the region that
666 // contains the object and it's passed optionally from callers who
667 // might already have it (no point in recalculating it).
668 inline void grayRoot(oop obj, size_t word_size,
669 uint worker_id, HeapRegion* hr = NULL);
671 // It iterates over the heap and for each object it comes across it
672 // will dump the contents of its reference fields, as well as
673 // liveness information for the object and its referents. The dump
674 // will be written to a file with the following name:
675 // G1PrintReachableBaseFile + "." + str.
676 // vo decides whether the prev (vo == UsePrevMarking), the next
677 // (vo == UseNextMarking) marking information, or the mark word
678 // (vo == UseMarkWord) will be used to determine the liveness of
679 // each object / referent.
680 // If all is true, all objects in the heap will be dumped, otherwise
681 // only the live ones. In the dump the following symbols / breviations
682 // are used:
683 // M : an explicitly live object (its bitmap bit is set)
684 // > : an implicitly live object (over tams)
685 // O : an object outside the G1 heap (typically: in the perm gen)
686 // NOT : a reference field whose referent is not live
687 // AND MARKED : indicates that an object is both explicitly and
688 // implicitly live (it should be one or the other, not both)
689 void print_reachable(const char* str,
690 VerifyOption vo, bool all) PRODUCT_RETURN;
692 // Clear the next marking bitmap (will be called concurrently).
693 void clearNextBitmap();
695 // These two do the work that needs to be done before and after the
696 // initial root checkpoint. Since this checkpoint can be done at two
697 // different points (i.e. an explicit pause or piggy-backed on a
698 // young collection), then it's nice to be able to easily share the
699 // pre/post code. It might be the case that we can put everything in
700 // the post method. TP
701 void checkpointRootsInitialPre();
702 void checkpointRootsInitialPost();
704 // Scan all the root regions and mark everything reachable from
705 // them.
706 void scanRootRegions();
708 // Scan a single root region and mark everything reachable from it.
709 void scanRootRegion(HeapRegion* hr, uint worker_id);
711 // Do concurrent phase of marking, to a tentative transitive closure.
712 void markFromRoots();
714 void checkpointRootsFinal(bool clear_all_soft_refs);
715 void checkpointRootsFinalWork();
716 void cleanup();
717 void completeCleanup();
719 // Mark in the previous bitmap. NB: this is usually read-only, so use
720 // this carefully!
721 inline void markPrev(oop p);
723 // Clears marks for all objects in the given range, for the prev,
724 // next, or both bitmaps. NB: the previous bitmap is usually
725 // read-only, so use this carefully!
726 void clearRangePrevBitmap(MemRegion mr);
727 void clearRangeNextBitmap(MemRegion mr);
728 void clearRangeBothBitmaps(MemRegion mr);
730 // Notify data structures that a GC has started.
731 void note_start_of_gc() {
732 _markStack.note_start_of_gc();
733 }
735 // Notify data structures that a GC is finished.
736 void note_end_of_gc() {
737 _markStack.note_end_of_gc();
738 }
740 // Verify that there are no CSet oops on the stacks (taskqueues /
741 // global mark stack), enqueued SATB buffers, per-thread SATB
742 // buffers, and fingers (global / per-task). The boolean parameters
743 // decide which of the above data structures to verify. If marking
744 // is not in progress, it's a no-op.
745 void verify_no_cset_oops(bool verify_stacks,
746 bool verify_enqueued_buffers,
747 bool verify_thread_buffers,
748 bool verify_fingers) PRODUCT_RETURN;
750 // It is called at the end of an evacuation pause during marking so
751 // that CM is notified of where the new end of the heap is. It
752 // doesn't do anything if concurrent_marking_in_progress() is false,
753 // unless the force parameter is true.
754 void update_g1_committed(bool force = false);
756 bool isMarked(oop p) const {
757 assert(p != NULL && p->is_oop(), "expected an oop");
758 HeapWord* addr = (HeapWord*)p;
759 assert(addr >= _nextMarkBitMap->startWord() ||
760 addr < _nextMarkBitMap->endWord(), "in a region");
762 return _nextMarkBitMap->isMarked(addr);
763 }
765 inline bool not_yet_marked(oop p) const;
767 // XXX Debug code
768 bool containing_card_is_marked(void* p);
769 bool containing_cards_are_marked(void* start, void* last);
771 bool isPrevMarked(oop p) const {
772 assert(p != NULL && p->is_oop(), "expected an oop");
773 HeapWord* addr = (HeapWord*)p;
774 assert(addr >= _prevMarkBitMap->startWord() ||
775 addr < _prevMarkBitMap->endWord(), "in a region");
777 return _prevMarkBitMap->isMarked(addr);
778 }
780 inline bool do_yield_check(uint worker_i = 0);
781 inline bool should_yield();
783 // Called to abort the marking cycle after a Full GC takes palce.
784 void abort();
786 // This prints the global/local fingers. It is used for debugging.
787 NOT_PRODUCT(void print_finger();)
789 void print_summary_info();
791 void print_worker_threads_on(outputStream* st) const;
793 // The following indicate whether a given verbose level has been
794 // set. Notice that anything above stats is conditional to
795 // _MARKING_VERBOSE_ having been set to 1
796 bool verbose_stats() {
797 return _verbose_level >= stats_verbose;
798 }
799 bool verbose_low() {
800 return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
801 }
802 bool verbose_medium() {
803 return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
804 }
805 bool verbose_high() {
806 return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
807 }
809 // Counting data structure accessors
811 // Returns the card number of the bottom of the G1 heap.
812 // Used in biasing indices into accounting card bitmaps.
813 intptr_t heap_bottom_card_num() const {
814 return _heap_bottom_card_num;
815 }
817 // Returns the card bitmap for a given task or worker id.
818 BitMap* count_card_bitmap_for(uint worker_id) {
819 assert(0 <= worker_id && worker_id < _max_task_num, "oob");
820 assert(_count_card_bitmaps != NULL, "uninitialized");
821 BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
822 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
823 return task_card_bm;
824 }
826 // Returns the array containing the marked bytes for each region,
827 // for the given worker or task id.
828 size_t* count_marked_bytes_array_for(uint worker_id) {
829 assert(0 <= worker_id && worker_id < _max_task_num, "oob");
830 assert(_count_marked_bytes != NULL, "uninitialized");
831 size_t* marked_bytes_array = _count_marked_bytes[worker_id];
832 assert(marked_bytes_array != NULL, "uninitialized");
833 return marked_bytes_array;
834 }
836 // Returns the index in the liveness accounting card table bitmap
837 // for the given address
838 inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
840 // Counts the size of the given memory region in the the given
841 // marked_bytes array slot for the given HeapRegion.
842 // Sets the bits in the given card bitmap that are associated with the
843 // cards that are spanned by the memory region.
844 inline void count_region(MemRegion mr, HeapRegion* hr,
845 size_t* marked_bytes_array,
846 BitMap* task_card_bm);
848 // Counts the given memory region in the task/worker counting
849 // data structures for the given worker id.
850 inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
852 // Counts the given memory region in the task/worker counting
853 // data structures for the given worker id.
854 inline void count_region(MemRegion mr, uint worker_id);
856 // Counts the given object in the given task/worker counting
857 // data structures.
858 inline void count_object(oop obj, HeapRegion* hr,
859 size_t* marked_bytes_array,
860 BitMap* task_card_bm);
862 // Counts the given object in the task/worker counting data
863 // structures for the given worker id.
864 inline void count_object(oop obj, HeapRegion* hr, uint worker_id);
866 // Attempts to mark the given object and, if successful, counts
867 // the object in the given task/worker counting structures.
868 inline bool par_mark_and_count(oop obj, HeapRegion* hr,
869 size_t* marked_bytes_array,
870 BitMap* task_card_bm);
872 // Attempts to mark the given object and, if successful, counts
873 // the object in the task/worker counting structures for the
874 // given worker id.
875 inline bool par_mark_and_count(oop obj, size_t word_size,
876 HeapRegion* hr, uint worker_id);
878 // Attempts to mark the given object and, if successful, counts
879 // the object in the task/worker counting structures for the
880 // given worker id.
881 inline bool par_mark_and_count(oop obj, HeapRegion* hr, uint worker_id);
883 // Similar to the above routine but we don't know the heap region that
884 // contains the object to be marked/counted, which this routine looks up.
885 inline bool par_mark_and_count(oop obj, uint worker_id);
887 // Similar to the above routine but there are times when we cannot
888 // safely calculate the size of obj due to races and we, therefore,
889 // pass the size in as a parameter. It is the caller's reponsibility
890 // to ensure that the size passed in for obj is valid.
891 inline bool par_mark_and_count(oop obj, size_t word_size, uint worker_id);
893 // Unconditionally mark the given object, and unconditinally count
894 // the object in the counting structures for worker id 0.
895 // Should *not* be called from parallel code.
896 inline bool mark_and_count(oop obj, HeapRegion* hr);
898 // Similar to the above routine but we don't know the heap region that
899 // contains the object to be marked/counted, which this routine looks up.
900 // Should *not* be called from parallel code.
901 inline bool mark_and_count(oop obj);
903 protected:
904 // Clear all the per-task bitmaps and arrays used to store the
905 // counting data.
906 void clear_all_count_data();
908 // Aggregates the counting data for each worker/task
909 // that was constructed while marking. Also sets
910 // the amount of marked bytes for each region and
911 // the top at concurrent mark count.
912 void aggregate_count_data();
914 // Verification routine
915 void verify_count_data();
916 };
918 // A class representing a marking task.
919 class CMTask : public TerminatorTerminator {
920 private:
921 enum PrivateConstants {
922 // the regular clock call is called once the scanned words reaches
923 // this limit
924 words_scanned_period = 12*1024,
925 // the regular clock call is called once the number of visited
926 // references reaches this limit
927 refs_reached_period = 384,
928 // initial value for the hash seed, used in the work stealing code
929 init_hash_seed = 17,
930 // how many entries will be transferred between global stack and
931 // local queues
932 global_stack_transfer_size = 16
933 };
935 int _task_id;
936 G1CollectedHeap* _g1h;
937 ConcurrentMark* _cm;
938 CMBitMap* _nextMarkBitMap;
939 // the task queue of this task
940 CMTaskQueue* _task_queue;
941 private:
942 // the task queue set---needed for stealing
943 CMTaskQueueSet* _task_queues;
944 // indicates whether the task has been claimed---this is only for
945 // debugging purposes
946 bool _claimed;
948 // number of calls to this task
949 int _calls;
951 // when the virtual timer reaches this time, the marking step should
952 // exit
953 double _time_target_ms;
954 // the start time of the current marking step
955 double _start_time_ms;
957 // the oop closure used for iterations over oops
958 G1CMOopClosure* _cm_oop_closure;
960 // the region this task is scanning, NULL if we're not scanning any
961 HeapRegion* _curr_region;
962 // the local finger of this task, NULL if we're not scanning a region
963 HeapWord* _finger;
964 // limit of the region this task is scanning, NULL if we're not scanning one
965 HeapWord* _region_limit;
967 // the number of words this task has scanned
968 size_t _words_scanned;
969 // When _words_scanned reaches this limit, the regular clock is
970 // called. Notice that this might be decreased under certain
971 // circumstances (i.e. when we believe that we did an expensive
972 // operation).
973 size_t _words_scanned_limit;
974 // the initial value of _words_scanned_limit (i.e. what it was
975 // before it was decreased).
976 size_t _real_words_scanned_limit;
978 // the number of references this task has visited
979 size_t _refs_reached;
980 // When _refs_reached reaches this limit, the regular clock is
981 // called. Notice this this might be decreased under certain
982 // circumstances (i.e. when we believe that we did an expensive
983 // operation).
984 size_t _refs_reached_limit;
985 // the initial value of _refs_reached_limit (i.e. what it was before
986 // it was decreased).
987 size_t _real_refs_reached_limit;
989 // used by the work stealing stuff
990 int _hash_seed;
991 // if this is true, then the task has aborted for some reason
992 bool _has_aborted;
993 // set when the task aborts because it has met its time quota
994 bool _has_timed_out;
995 // true when we're draining SATB buffers; this avoids the task
996 // aborting due to SATB buffers being available (as we're already
997 // dealing with them)
998 bool _draining_satb_buffers;
1000 // number sequence of past step times
1001 NumberSeq _step_times_ms;
1002 // elapsed time of this task
1003 double _elapsed_time_ms;
1004 // termination time of this task
1005 double _termination_time_ms;
1006 // when this task got into the termination protocol
1007 double _termination_start_time_ms;
1009 // true when the task is during a concurrent phase, false when it is
1010 // in the remark phase (so, in the latter case, we do not have to
1011 // check all the things that we have to check during the concurrent
1012 // phase, i.e. SATB buffer availability...)
1013 bool _concurrent;
1015 TruncatedSeq _marking_step_diffs_ms;
1017 // Counting data structures. Embedding the task's marked_bytes_array
1018 // and card bitmap into the actual task saves having to go through
1019 // the ConcurrentMark object.
1020 size_t* _marked_bytes_array;
1021 BitMap* _card_bm;
1023 // LOTS of statistics related with this task
1024 #if _MARKING_STATS_
1025 NumberSeq _all_clock_intervals_ms;
1026 double _interval_start_time_ms;
1028 int _aborted;
1029 int _aborted_overflow;
1030 int _aborted_cm_aborted;
1031 int _aborted_yield;
1032 int _aborted_timed_out;
1033 int _aborted_satb;
1034 int _aborted_termination;
1036 int _steal_attempts;
1037 int _steals;
1039 int _clock_due_to_marking;
1040 int _clock_due_to_scanning;
1042 int _local_pushes;
1043 int _local_pops;
1044 int _local_max_size;
1045 int _objs_scanned;
1047 int _global_pushes;
1048 int _global_pops;
1049 int _global_max_size;
1051 int _global_transfers_to;
1052 int _global_transfers_from;
1054 int _regions_claimed;
1055 int _objs_found_on_bitmap;
1057 int _satb_buffers_processed;
1058 #endif // _MARKING_STATS_
1060 // it updates the local fields after this task has claimed
1061 // a new region to scan
1062 void setup_for_region(HeapRegion* hr);
1063 // it brings up-to-date the limit of the region
1064 void update_region_limit();
1066 // called when either the words scanned or the refs visited limit
1067 // has been reached
1068 void reached_limit();
1069 // recalculates the words scanned and refs visited limits
1070 void recalculate_limits();
1071 // decreases the words scanned and refs visited limits when we reach
1072 // an expensive operation
1073 void decrease_limits();
1074 // it checks whether the words scanned or refs visited reached their
1075 // respective limit and calls reached_limit() if they have
1076 void check_limits() {
1077 if (_words_scanned >= _words_scanned_limit ||
1078 _refs_reached >= _refs_reached_limit) {
1079 reached_limit();
1080 }
1081 }
1082 // this is supposed to be called regularly during a marking step as
1083 // it checks a bunch of conditions that might cause the marking step
1084 // to abort
1085 void regular_clock_call();
1086 bool concurrent() { return _concurrent; }
1088 public:
1089 // It resets the task; it should be called right at the beginning of
1090 // a marking phase.
1091 void reset(CMBitMap* _nextMarkBitMap);
1092 // it clears all the fields that correspond to a claimed region.
1093 void clear_region_fields();
1095 void set_concurrent(bool concurrent) { _concurrent = concurrent; }
1097 // The main method of this class which performs a marking step
1098 // trying not to exceed the given duration. However, it might exit
1099 // prematurely, according to some conditions (i.e. SATB buffers are
1100 // available for processing).
1101 void do_marking_step(double target_ms, bool do_stealing, bool do_termination);
1103 // These two calls start and stop the timer
1104 void record_start_time() {
1105 _elapsed_time_ms = os::elapsedTime() * 1000.0;
1106 }
1107 void record_end_time() {
1108 _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
1109 }
1111 // returns the task ID
1112 int task_id() { return _task_id; }
1114 // From TerminatorTerminator. It determines whether this task should
1115 // exit the termination protocol after it's entered it.
1116 virtual bool should_exit_termination();
1118 // Resets the local region fields after a task has finished scanning a
1119 // region; or when they have become stale as a result of the region
1120 // being evacuated.
1121 void giveup_current_region();
1123 HeapWord* finger() { return _finger; }
1125 bool has_aborted() { return _has_aborted; }
1126 void set_has_aborted() { _has_aborted = true; }
1127 void clear_has_aborted() { _has_aborted = false; }
1128 bool has_timed_out() { return _has_timed_out; }
1129 bool claimed() { return _claimed; }
1131 void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
1133 // It grays the object by marking it and, if necessary, pushing it
1134 // on the local queue
1135 inline void deal_with_reference(oop obj);
1137 // It scans an object and visits its children.
1138 void scan_object(oop obj);
1140 // It pushes an object on the local queue.
1141 inline void push(oop obj);
1143 // These two move entries to/from the global stack.
1144 void move_entries_to_global_stack();
1145 void get_entries_from_global_stack();
1147 // It pops and scans objects from the local queue. If partially is
1148 // true, then it stops when the queue size is of a given limit. If
1149 // partially is false, then it stops when the queue is empty.
1150 void drain_local_queue(bool partially);
1151 // It moves entries from the global stack to the local queue and
1152 // drains the local queue. If partially is true, then it stops when
1153 // both the global stack and the local queue reach a given size. If
1154 // partially if false, it tries to empty them totally.
1155 void drain_global_stack(bool partially);
1156 // It keeps picking SATB buffers and processing them until no SATB
1157 // buffers are available.
1158 void drain_satb_buffers();
1160 // moves the local finger to a new location
1161 inline void move_finger_to(HeapWord* new_finger) {
1162 assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
1163 _finger = new_finger;
1164 }
1166 CMTask(int task_num, ConcurrentMark *cm,
1167 size_t* marked_bytes, BitMap* card_bm,
1168 CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
1170 // it prints statistics associated with this task
1171 void print_stats();
1173 #if _MARKING_STATS_
1174 void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
1175 #endif // _MARKING_STATS_
1176 };
1178 // Class that's used to to print out per-region liveness
1179 // information. It's currently used at the end of marking and also
1180 // after we sort the old regions at the end of the cleanup operation.
1181 class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
1182 private:
1183 outputStream* _out;
1185 // Accumulators for these values.
1186 size_t _total_used_bytes;
1187 size_t _total_capacity_bytes;
1188 size_t _total_prev_live_bytes;
1189 size_t _total_next_live_bytes;
1191 // These are set up when we come across a "stars humongous" region
1192 // (as this is where most of this information is stored, not in the
1193 // subsequent "continues humongous" regions). After that, for every
1194 // region in a given humongous region series we deduce the right
1195 // values for it by simply subtracting the appropriate amount from
1196 // these fields. All these values should reach 0 after we've visited
1197 // the last region in the series.
1198 size_t _hum_used_bytes;
1199 size_t _hum_capacity_bytes;
1200 size_t _hum_prev_live_bytes;
1201 size_t _hum_next_live_bytes;
1203 static double perc(size_t val, size_t total) {
1204 if (total == 0) {
1205 return 0.0;
1206 } else {
1207 return 100.0 * ((double) val / (double) total);
1208 }
1209 }
1211 static double bytes_to_mb(size_t val) {
1212 return (double) val / (double) M;
1213 }
1215 // See the .cpp file.
1216 size_t get_hum_bytes(size_t* hum_bytes);
1217 void get_hum_bytes(size_t* used_bytes, size_t* capacity_bytes,
1218 size_t* prev_live_bytes, size_t* next_live_bytes);
1220 public:
1221 // The header and footer are printed in the constructor and
1222 // destructor respectively.
1223 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name);
1224 virtual bool doHeapRegion(HeapRegion* r);
1225 ~G1PrintRegionLivenessInfoClosure();
1226 };
1228 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP