1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Jun 05 15:57:56 2008 -0700 1.3 @@ -0,0 +1,1049 @@ 1.4 +/* 1.5 + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +class G1CollectedHeap; 1.29 +class CMTask; 1.30 +typedef GenericTaskQueue<oop> CMTaskQueue; 1.31 +typedef GenericTaskQueueSet<oop> CMTaskQueueSet; 1.32 + 1.33 +// A generic CM bit map. This is essentially a wrapper around the BitMap 1.34 +// class, with one bit per (1<<_shifter) HeapWords. 1.35 + 1.36 +class CMBitMapRO { 1.37 + protected: 1.38 + HeapWord* _bmStartWord; // base address of range covered by map 1.39 + size_t _bmWordSize; // map size (in #HeapWords covered) 1.40 + const int _shifter; // map to char or bit 1.41 + VirtualSpace _virtual_space; // underlying the bit map 1.42 + BitMap _bm; // the bit map itself 1.43 + 1.44 + public: 1.45 + // constructor 1.46 + CMBitMapRO(ReservedSpace rs, int shifter); 1.47 + 1.48 + enum { do_yield = true }; 1.49 + 1.50 + // inquiries 1.51 + HeapWord* startWord() const { return _bmStartWord; } 1.52 + size_t sizeInWords() const { return _bmWordSize; } 1.53 + // the following is one past the last word in space 1.54 + HeapWord* endWord() const { return _bmStartWord + _bmWordSize; } 1.55 + 1.56 + // read marks 1.57 + 1.58 + bool isMarked(HeapWord* addr) const { 1.59 + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), 1.60 + "outside underlying space?"); 1.61 + return _bm.at(heapWordToOffset(addr)); 1.62 + } 1.63 + 1.64 + // iteration 1.65 + bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); } 1.66 + bool iterate(BitMapClosure* cl, MemRegion mr); 1.67 + 1.68 + // Return the address corresponding to the next marked bit at or after 1.69 + // "addr", and before "limit", if "limit" is non-NULL. If there is no 1.70 + // such bit, returns "limit" if that is non-NULL, or else "endWord()". 1.71 + HeapWord* getNextMarkedWordAddress(HeapWord* addr, 1.72 + HeapWord* limit = NULL) const; 1.73 + // Return the address corresponding to the next unmarked bit at or after 1.74 + // "addr", and before "limit", if "limit" is non-NULL. If there is no 1.75 + // such bit, returns "limit" if that is non-NULL, or else "endWord()". 1.76 + HeapWord* getNextUnmarkedWordAddress(HeapWord* addr, 1.77 + HeapWord* limit = NULL) const; 1.78 + 1.79 + // conversion utilities 1.80 + // XXX Fix these so that offsets are size_t's... 1.81 + HeapWord* offsetToHeapWord(size_t offset) const { 1.82 + return _bmStartWord + (offset << _shifter); 1.83 + } 1.84 + size_t heapWordToOffset(HeapWord* addr) const { 1.85 + return pointer_delta(addr, _bmStartWord) >> _shifter; 1.86 + } 1.87 + int heapWordDiffToOffsetDiff(size_t diff) const; 1.88 + HeapWord* nextWord(HeapWord* addr) { 1.89 + return offsetToHeapWord(heapWordToOffset(addr) + 1); 1.90 + } 1.91 + 1.92 + void mostly_disjoint_range_union(BitMap* from_bitmap, 1.93 + size_t from_start_index, 1.94 + HeapWord* to_start_word, 1.95 + size_t word_num); 1.96 + 1.97 + // debugging 1.98 + NOT_PRODUCT(bool covers(ReservedSpace rs) const;) 1.99 +}; 1.100 + 1.101 +class CMBitMap : public CMBitMapRO { 1.102 + 1.103 + public: 1.104 + // constructor 1.105 + CMBitMap(ReservedSpace rs, int shifter) : 1.106 + CMBitMapRO(rs, shifter) {} 1.107 + 1.108 + // write marks 1.109 + void mark(HeapWord* addr) { 1.110 + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), 1.111 + "outside underlying space?"); 1.112 + _bm.at_put(heapWordToOffset(addr), true); 1.113 + } 1.114 + void clear(HeapWord* addr) { 1.115 + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), 1.116 + "outside underlying space?"); 1.117 + _bm.at_put(heapWordToOffset(addr), false); 1.118 + } 1.119 + bool parMark(HeapWord* addr) { 1.120 + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), 1.121 + "outside underlying space?"); 1.122 + return _bm.par_at_put(heapWordToOffset(addr), true); 1.123 + } 1.124 + bool parClear(HeapWord* addr) { 1.125 + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), 1.126 + "outside underlying space?"); 1.127 + return _bm.par_at_put(heapWordToOffset(addr), false); 1.128 + } 1.129 + void markRange(MemRegion mr); 1.130 + void clearAll(); 1.131 + void clearRange(MemRegion mr); 1.132 + 1.133 + // Starting at the bit corresponding to "addr" (inclusive), find the next 1.134 + // "1" bit, if any. This bit starts some run of consecutive "1"'s; find 1.135 + // the end of this run (stopping at "end_addr"). Return the MemRegion 1.136 + // covering from the start of the region corresponding to the first bit 1.137 + // of the run to the end of the region corresponding to the last bit of 1.138 + // the run. If there is no "1" bit at or after "addr", return an empty 1.139 + // MemRegion. 1.140 + MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr); 1.141 +}; 1.142 + 1.143 +// Represents a marking stack used by the CM collector. 1.144 +// Ideally this should be GrowableArray<> just like MSC's marking stack(s). 1.145 +class CMMarkStack { 1.146 + ConcurrentMark* _cm; 1.147 + oop* _base; // bottom of stack 1.148 + jint _index; // one more than last occupied index 1.149 + jint _capacity; // max #elements 1.150 + jint _oops_do_bound; // Number of elements to include in next iteration. 1.151 + NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run 1.152 + 1.153 + bool _overflow; 1.154 + DEBUG_ONLY(bool _drain_in_progress;) 1.155 + DEBUG_ONLY(bool _drain_in_progress_yields;) 1.156 + 1.157 + public: 1.158 + CMMarkStack(ConcurrentMark* cm); 1.159 + ~CMMarkStack(); 1.160 + 1.161 + void allocate(size_t size); 1.162 + 1.163 + oop pop() { 1.164 + if (!isEmpty()) { 1.165 + return _base[--_index] ; 1.166 + } 1.167 + return NULL; 1.168 + } 1.169 + 1.170 + // If overflow happens, don't do the push, and record the overflow. 1.171 + // *Requires* that "ptr" is already marked. 1.172 + void push(oop ptr) { 1.173 + if (isFull()) { 1.174 + // Record overflow. 1.175 + _overflow = true; 1.176 + return; 1.177 + } else { 1.178 + _base[_index++] = ptr; 1.179 + NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index)); 1.180 + } 1.181 + } 1.182 + // Non-block impl. Note: concurrency is allowed only with other 1.183 + // "par_push" operations, not with "pop" or "drain". We would need 1.184 + // parallel versions of them if such concurrency was desired. 1.185 + void par_push(oop ptr); 1.186 + 1.187 + // Pushes the first "n" elements of "ptr_arr" on the stack. 1.188 + // Non-block impl. Note: concurrency is allowed only with other 1.189 + // "par_adjoin_arr" or "push" operations, not with "pop" or "drain". 1.190 + void par_adjoin_arr(oop* ptr_arr, int n); 1.191 + 1.192 + // Pushes the first "n" elements of "ptr_arr" on the stack. 1.193 + // Locking impl: concurrency is allowed only with 1.194 + // "par_push_arr" and/or "par_pop_arr" operations, which use the same 1.195 + // locking strategy. 1.196 + void par_push_arr(oop* ptr_arr, int n); 1.197 + 1.198 + // If returns false, the array was empty. Otherwise, removes up to "max" 1.199 + // elements from the stack, and transfers them to "ptr_arr" in an 1.200 + // unspecified order. The actual number transferred is given in "n" ("n 1.201 + // == 0" is deliberately redundant with the return value.) Locking impl: 1.202 + // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr" 1.203 + // operations, which use the same locking strategy. 1.204 + bool par_pop_arr(oop* ptr_arr, int max, int* n); 1.205 + 1.206 + // Drain the mark stack, applying the given closure to all fields of 1.207 + // objects on the stack. (That is, continue until the stack is empty, 1.208 + // even if closure applications add entries to the stack.) The "bm" 1.209 + // argument, if non-null, may be used to verify that only marked objects 1.210 + // are on the mark stack. If "yield_after" is "true", then the 1.211 + // concurrent marker performing the drain offers to yield after 1.212 + // processing each object. If a yield occurs, stops the drain operation 1.213 + // and returns false. Otherwise, returns true. 1.214 + template<class OopClosureClass> 1.215 + bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false); 1.216 + 1.217 + bool isEmpty() { return _index == 0; } 1.218 + bool isFull() { return _index == _capacity; } 1.219 + int maxElems() { return _capacity; } 1.220 + 1.221 + bool overflow() { return _overflow; } 1.222 + void clear_overflow() { _overflow = false; } 1.223 + 1.224 + int size() { return _index; } 1.225 + 1.226 + void setEmpty() { _index = 0; clear_overflow(); } 1.227 + 1.228 + // Record the current size; a subsequent "oops_do" will iterate only over 1.229 + // indices valid at the time of this call. 1.230 + void set_oops_do_bound(jint bound = -1) { 1.231 + if (bound == -1) { 1.232 + _oops_do_bound = _index; 1.233 + } else { 1.234 + _oops_do_bound = bound; 1.235 + } 1.236 + } 1.237 + jint oops_do_bound() { return _oops_do_bound; } 1.238 + // iterate over the oops in the mark stack, up to the bound recorded via 1.239 + // the call above. 1.240 + void oops_do(OopClosure* f); 1.241 +}; 1.242 + 1.243 +class CMRegionStack { 1.244 + MemRegion* _base; 1.245 + jint _capacity; 1.246 + jint _index; 1.247 + jint _oops_do_bound; 1.248 + bool _overflow; 1.249 +public: 1.250 + CMRegionStack(); 1.251 + ~CMRegionStack(); 1.252 + void allocate(size_t size); 1.253 + 1.254 + // This is lock-free; assumes that it will only be called in parallel 1.255 + // with other "push" operations (no pops). 1.256 + void push(MemRegion mr); 1.257 + 1.258 + // Lock-free; assumes that it will only be called in parallel 1.259 + // with other "pop" operations (no pushes). 1.260 + MemRegion pop(); 1.261 + 1.262 + bool isEmpty() { return _index == 0; } 1.263 + bool isFull() { return _index == _capacity; } 1.264 + 1.265 + bool overflow() { return _overflow; } 1.266 + void clear_overflow() { _overflow = false; } 1.267 + 1.268 + int size() { return _index; } 1.269 + 1.270 + // It iterates over the entries in the region stack and it 1.271 + // invalidates (i.e. assigns MemRegion()) the ones that point to 1.272 + // regions in the collection set. 1.273 + bool invalidate_entries_into_cset(); 1.274 + 1.275 + // This gives an upper bound up to which the iteration in 1.276 + // invalidate_entries_into_cset() will reach. This prevents 1.277 + // newly-added entries to be unnecessarily scanned. 1.278 + void set_oops_do_bound() { 1.279 + _oops_do_bound = _index; 1.280 + } 1.281 + 1.282 + void setEmpty() { _index = 0; clear_overflow(); } 1.283 +}; 1.284 + 1.285 +// this will enable a variety of different statistics per GC task 1.286 +#define _MARKING_STATS_ 0 1.287 +// this will enable the higher verbose levels 1.288 +#define _MARKING_VERBOSE_ 0 1.289 + 1.290 +#if _MARKING_STATS_ 1.291 +#define statsOnly(statement) \ 1.292 +do { \ 1.293 + statement ; \ 1.294 +} while (0) 1.295 +#else // _MARKING_STATS_ 1.296 +#define statsOnly(statement) \ 1.297 +do { \ 1.298 +} while (0) 1.299 +#endif // _MARKING_STATS_ 1.300 + 1.301 +// Some extra guarantees that I like to also enable in optimised mode 1.302 +// when debugging. If you want to enable them, comment out the assert 1.303 +// macro and uncomment out the guaratee macro 1.304 +// #define tmp_guarantee_CM(expr, str) guarantee(expr, str) 1.305 +#define tmp_guarantee_CM(expr, str) assert(expr, str) 1.306 + 1.307 +typedef enum { 1.308 + no_verbose = 0, // verbose turned off 1.309 + stats_verbose, // only prints stats at the end of marking 1.310 + low_verbose, // low verbose, mostly per region and per major event 1.311 + medium_verbose, // a bit more detailed than low 1.312 + high_verbose // per object verbose 1.313 +} CMVerboseLevel; 1.314 + 1.315 + 1.316 +class ConcurrentMarkThread; 1.317 + 1.318 +class ConcurrentMark { 1.319 + friend class ConcurrentMarkThread; 1.320 + friend class CMTask; 1.321 + friend class CMBitMapClosure; 1.322 + friend class CSMarkOopClosure; 1.323 + friend class CMGlobalObjectClosure; 1.324 + friend class CMRemarkTask; 1.325 + friend class CMConcurrentMarkingTask; 1.326 + friend class G1ParNoteEndTask; 1.327 + friend class CalcLiveObjectsClosure; 1.328 + 1.329 +protected: 1.330 + ConcurrentMarkThread* _cmThread; // the thread doing the work 1.331 + G1CollectedHeap* _g1h; // the heap. 1.332 + size_t _parallel_marking_threads; // the number of marking 1.333 + // threads we'll use 1.334 + double _sleep_factor; // how much we have to sleep, with 1.335 + // respect to the work we just did, to 1.336 + // meet the marking overhead goal 1.337 + double _marking_task_overhead; // marking target overhead for 1.338 + // a single task 1.339 + 1.340 + // same as the two above, but for the cleanup task 1.341 + double _cleanup_sleep_factor; 1.342 + double _cleanup_task_overhead; 1.343 + 1.344 + // Stuff related to age cohort processing. 1.345 + struct ParCleanupThreadState { 1.346 + char _pre[64]; 1.347 + UncleanRegionList list; 1.348 + char _post[64]; 1.349 + }; 1.350 + ParCleanupThreadState** _par_cleanup_thread_state; 1.351 + 1.352 + // CMS marking support structures 1.353 + CMBitMap _markBitMap1; 1.354 + CMBitMap _markBitMap2; 1.355 + CMBitMapRO* _prevMarkBitMap; // completed mark bitmap 1.356 + CMBitMap* _nextMarkBitMap; // under-construction mark bitmap 1.357 + bool _at_least_one_mark_complete; 1.358 + 1.359 + BitMap _region_bm; 1.360 + BitMap _card_bm; 1.361 + 1.362 + // Heap bounds 1.363 + HeapWord* _heap_start; 1.364 + HeapWord* _heap_end; 1.365 + 1.366 + // For gray objects 1.367 + CMMarkStack _markStack; // Grey objects behind global finger. 1.368 + CMRegionStack _regionStack; // Grey regions behind global finger. 1.369 + HeapWord* volatile _finger; // the global finger, region aligned, 1.370 + // always points to the end of the 1.371 + // last claimed region 1.372 + 1.373 + // marking tasks 1.374 + size_t _max_task_num; // maximum task number 1.375 + size_t _active_tasks; // task num currently active 1.376 + CMTask** _tasks; // task queue array (max_task_num len) 1.377 + CMTaskQueueSet* _task_queues; // task queue set 1.378 + ParallelTaskTerminator _terminator; // for termination 1.379 + 1.380 + // Two sync barriers that are used to synchronise tasks when an 1.381 + // overflow occurs. The algorithm is the following. All tasks enter 1.382 + // the first one to ensure that they have all stopped manipulating 1.383 + // the global data structures. After they exit it, they re-initialise 1.384 + // their data structures and task 0 re-initialises the global data 1.385 + // structures. Then, they enter the second sync barrier. This 1.386 + // ensure, that no task starts doing work before all data 1.387 + // structures (local and global) have been re-initialised. When they 1.388 + // exit it, they are free to start working again. 1.389 + WorkGangBarrierSync _first_overflow_barrier_sync; 1.390 + WorkGangBarrierSync _second_overflow_barrier_sync; 1.391 + 1.392 + 1.393 + // this is set by any task, when an overflow on the global data 1.394 + // structures is detected. 1.395 + volatile bool _has_overflown; 1.396 + // true: marking is concurrent, false: we're in remark 1.397 + volatile bool _concurrent; 1.398 + // set at the end of a Full GC so that marking aborts 1.399 + volatile bool _has_aborted; 1.400 + // used when remark aborts due to an overflow to indicate that 1.401 + // another concurrent marking phase should start 1.402 + volatile bool _restart_for_overflow; 1.403 + 1.404 + // This is true from the very start of concurrent marking until the 1.405 + // point when all the tasks complete their work. It is really used 1.406 + // to determine the points between the end of concurrent marking and 1.407 + // time of remark. 1.408 + volatile bool _concurrent_marking_in_progress; 1.409 + 1.410 + // verbose level 1.411 + CMVerboseLevel _verbose_level; 1.412 + 1.413 + COTracker _cleanup_co_tracker; 1.414 + 1.415 + // These two fields are used to implement the optimisation that 1.416 + // avoids pushing objects on the global/region stack if there are 1.417 + // no collection set regions above the lowest finger. 1.418 + 1.419 + // This is the lowest finger (among the global and local fingers), 1.420 + // which is calculated before a new collection set is chosen. 1.421 + HeapWord* _min_finger; 1.422 + // If this flag is true, objects/regions that are marked below the 1.423 + // finger should be pushed on the stack(s). If this is flag is 1.424 + // false, it is safe not to push them on the stack(s). 1.425 + bool _should_gray_objects; 1.426 + 1.427 + // All of these times are in ms. 1.428 + NumberSeq _init_times; 1.429 + NumberSeq _remark_times; 1.430 + NumberSeq _remark_mark_times; 1.431 + NumberSeq _remark_weak_ref_times; 1.432 + NumberSeq _cleanup_times; 1.433 + double _total_counting_time; 1.434 + double _total_rs_scrub_time; 1.435 + 1.436 + double* _accum_task_vtime; // accumulated task vtime 1.437 + 1.438 + WorkGang* _parallel_workers; 1.439 + 1.440 + void weakRefsWork(bool clear_all_soft_refs); 1.441 + 1.442 + void swapMarkBitMaps(); 1.443 + 1.444 + // It resets the global marking data structures, as well as the 1.445 + // task local ones; should be called during initial mark. 1.446 + void reset(); 1.447 + // It resets all the marking data structures. 1.448 + void clear_marking_state(); 1.449 + 1.450 + // It should be called to indicate which phase we're in (concurrent 1.451 + // mark or remark) and how many threads are currently active. 1.452 + void set_phase(size_t active_tasks, bool concurrent); 1.453 + // We do this after we're done with marking so that the marking data 1.454 + // structures are initialised to a sensible and predictable state. 1.455 + void set_non_marking_state(); 1.456 + 1.457 + // prints all gathered CM-related statistics 1.458 + void print_stats(); 1.459 + 1.460 + // accessor methods 1.461 + size_t parallel_marking_threads() { return _parallel_marking_threads; } 1.462 + double sleep_factor() { return _sleep_factor; } 1.463 + double marking_task_overhead() { return _marking_task_overhead;} 1.464 + double cleanup_sleep_factor() { return _cleanup_sleep_factor; } 1.465 + double cleanup_task_overhead() { return _cleanup_task_overhead;} 1.466 + 1.467 + HeapWord* finger() { return _finger; } 1.468 + bool concurrent() { return _concurrent; } 1.469 + size_t active_tasks() { return _active_tasks; } 1.470 + ParallelTaskTerminator* terminator() { return &_terminator; } 1.471 + 1.472 + // It claims the next available region to be scanned by a marking 1.473 + // task. It might return NULL if the next region is empty or we have 1.474 + // run out of regions. In the latter case, out_of_regions() 1.475 + // determines whether we've really run out of regions or the task 1.476 + // should call claim_region() again. This might seem a bit 1.477 + // awkward. Originally, the code was written so that claim_region() 1.478 + // either successfully returned with a non-empty region or there 1.479 + // were no more regions to be claimed. The problem with this was 1.480 + // that, in certain circumstances, it iterated over large chunks of 1.481 + // the heap finding only empty regions and, while it was working, it 1.482 + // was preventing the calling task to call its regular clock 1.483 + // method. So, this way, each task will spend very little time in 1.484 + // claim_region() and is allowed to call the regular clock method 1.485 + // frequently. 1.486 + HeapRegion* claim_region(int task); 1.487 + 1.488 + // It determines whether we've run out of regions to scan. 1.489 + bool out_of_regions() { return _finger == _heap_end; } 1.490 + 1.491 + // Returns the task with the given id 1.492 + CMTask* task(int id) { 1.493 + guarantee( 0 <= id && id < (int) _active_tasks, "task id not within " 1.494 + "active bounds" ); 1.495 + return _tasks[id]; 1.496 + } 1.497 + 1.498 + // Returns the task queue with the given id 1.499 + CMTaskQueue* task_queue(int id) { 1.500 + guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within " 1.501 + "active bounds" ); 1.502 + return (CMTaskQueue*) _task_queues->queue(id); 1.503 + } 1.504 + 1.505 + // Returns the task queue set 1.506 + CMTaskQueueSet* task_queues() { return _task_queues; } 1.507 + 1.508 + // Access / manipulation of the overflow flag which is set to 1.509 + // indicate that the global stack or region stack has overflown 1.510 + bool has_overflown() { return _has_overflown; } 1.511 + void set_has_overflown() { _has_overflown = true; } 1.512 + void clear_has_overflown() { _has_overflown = false; } 1.513 + 1.514 + bool has_aborted() { return _has_aborted; } 1.515 + bool restart_for_overflow() { return _restart_for_overflow; } 1.516 + 1.517 + // Methods to enter the two overflow sync barriers 1.518 + void enter_first_sync_barrier(int task_num); 1.519 + void enter_second_sync_barrier(int task_num); 1.520 + 1.521 +public: 1.522 + // Manipulation of the global mark stack. 1.523 + // Notice that the first mark_stack_push is CAS-based, whereas the 1.524 + // two below are Mutex-based. This is OK since the first one is only 1.525 + // called during evacuation pauses and doesn't compete with the 1.526 + // other two (which are called by the marking tasks during 1.527 + // concurrent marking or remark). 1.528 + bool mark_stack_push(oop p) { 1.529 + _markStack.par_push(p); 1.530 + if (_markStack.overflow()) { 1.531 + set_has_overflown(); 1.532 + return false; 1.533 + } 1.534 + return true; 1.535 + } 1.536 + bool mark_stack_push(oop* arr, int n) { 1.537 + _markStack.par_push_arr(arr, n); 1.538 + if (_markStack.overflow()) { 1.539 + set_has_overflown(); 1.540 + return false; 1.541 + } 1.542 + return true; 1.543 + } 1.544 + void mark_stack_pop(oop* arr, int max, int* n) { 1.545 + _markStack.par_pop_arr(arr, max, n); 1.546 + } 1.547 + size_t mark_stack_size() { return _markStack.size(); } 1.548 + size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; } 1.549 + bool mark_stack_overflow() { return _markStack.overflow(); } 1.550 + bool mark_stack_empty() { return _markStack.isEmpty(); } 1.551 + 1.552 + // Manipulation of the region stack 1.553 + bool region_stack_push(MemRegion mr) { 1.554 + _regionStack.push(mr); 1.555 + if (_regionStack.overflow()) { 1.556 + set_has_overflown(); 1.557 + return false; 1.558 + } 1.559 + return true; 1.560 + } 1.561 + MemRegion region_stack_pop() { return _regionStack.pop(); } 1.562 + int region_stack_size() { return _regionStack.size(); } 1.563 + bool region_stack_overflow() { return _regionStack.overflow(); } 1.564 + bool region_stack_empty() { return _regionStack.isEmpty(); } 1.565 + 1.566 + bool concurrent_marking_in_progress() { 1.567 + return _concurrent_marking_in_progress; 1.568 + } 1.569 + void set_concurrent_marking_in_progress() { 1.570 + _concurrent_marking_in_progress = true; 1.571 + } 1.572 + void clear_concurrent_marking_in_progress() { 1.573 + _concurrent_marking_in_progress = false; 1.574 + } 1.575 + 1.576 + void update_accum_task_vtime(int i, double vtime) { 1.577 + _accum_task_vtime[i] += vtime; 1.578 + } 1.579 + 1.580 + double all_task_accum_vtime() { 1.581 + double ret = 0.0; 1.582 + for (int i = 0; i < (int)_max_task_num; ++i) 1.583 + ret += _accum_task_vtime[i]; 1.584 + return ret; 1.585 + } 1.586 + 1.587 + // Attempts to steal an object from the task queues of other tasks 1.588 + bool try_stealing(int task_num, int* hash_seed, oop& obj) { 1.589 + return _task_queues->steal(task_num, hash_seed, obj); 1.590 + } 1.591 + 1.592 + // It grays an object by first marking it. Then, if it's behind the 1.593 + // global finger, it also pushes it on the global stack. 1.594 + void deal_with_reference(oop obj); 1.595 + 1.596 + ConcurrentMark(ReservedSpace rs, int max_regions); 1.597 + ~ConcurrentMark(); 1.598 + ConcurrentMarkThread* cmThread() { return _cmThread; } 1.599 + 1.600 + CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; } 1.601 + CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; } 1.602 + 1.603 + // The following three are interaction between CM and 1.604 + // G1CollectedHeap 1.605 + 1.606 + // This notifies CM that a root during initial-mark needs to be 1.607 + // grayed and it's MT-safe. Currently, we just mark it. But, in the 1.608 + // future, we can experiment with pushing it on the stack and we can 1.609 + // do this without changing G1CollectedHeap. 1.610 + void grayRoot(oop p); 1.611 + // It's used during evacuation pauses to gray a region, if 1.612 + // necessary, and it's MT-safe. It assumes that the caller has 1.613 + // marked any objects on that region. If _should_gray_objects is 1.614 + // true and we're still doing concurrent marking, the region is 1.615 + // pushed on the region stack, if it is located below the global 1.616 + // finger, otherwise we do nothing. 1.617 + void grayRegionIfNecessary(MemRegion mr); 1.618 + // It's used during evacuation pauses to mark and, if necessary, 1.619 + // gray a single object and it's MT-safe. It assumes the caller did 1.620 + // not mark the object. If _should_gray_objects is true and we're 1.621 + // still doing concurrent marking, the objects is pushed on the 1.622 + // global stack, if it is located below the global finger, otherwise 1.623 + // we do nothing. 1.624 + void markAndGrayObjectIfNecessary(oop p); 1.625 + 1.626 + // This iterates over the bitmap of the previous marking and prints 1.627 + // out all objects that are marked on the bitmap and indicates 1.628 + // whether what they point to is also marked or not. 1.629 + void print_prev_bitmap_reachable(); 1.630 + 1.631 + // Clear the next marking bitmap (will be called concurrently). 1.632 + void clearNextBitmap(); 1.633 + 1.634 + // main CMS steps and related support 1.635 + void checkpointRootsInitial(); 1.636 + 1.637 + // These two do the work that needs to be done before and after the 1.638 + // initial root checkpoint. Since this checkpoint can be done at two 1.639 + // different points (i.e. an explicit pause or piggy-backed on a 1.640 + // young collection), then it's nice to be able to easily share the 1.641 + // pre/post code. It might be the case that we can put everything in 1.642 + // the post method. TP 1.643 + void checkpointRootsInitialPre(); 1.644 + void checkpointRootsInitialPost(); 1.645 + 1.646 + // Do concurrent phase of marking, to a tentative transitive closure. 1.647 + void markFromRoots(); 1.648 + 1.649 + // Process all unprocessed SATB buffers. It is called at the 1.650 + // beginning of an evacuation pause. 1.651 + void drainAllSATBBuffers(); 1.652 + 1.653 + void checkpointRootsFinal(bool clear_all_soft_refs); 1.654 + void checkpointRootsFinalWork(); 1.655 + void calcDesiredRegions(); 1.656 + void cleanup(); 1.657 + void completeCleanup(); 1.658 + 1.659 + // Mark in the previous bitmap. NB: this is usually read-only, so use 1.660 + // this carefully! 1.661 + void markPrev(oop p); 1.662 + void clear(oop p); 1.663 + // Clears marks for all objects in the given range, for both prev and 1.664 + // next bitmaps. NB: the previous bitmap is usually read-only, so use 1.665 + // this carefully! 1.666 + void clearRangeBothMaps(MemRegion mr); 1.667 + 1.668 + // Record the current top of the mark and region stacks; a 1.669 + // subsequent oops_do() on the mark stack and 1.670 + // invalidate_entries_into_cset() on the region stack will iterate 1.671 + // only over indices valid at the time of this call. 1.672 + void set_oops_do_bound() { 1.673 + _markStack.set_oops_do_bound(); 1.674 + _regionStack.set_oops_do_bound(); 1.675 + } 1.676 + // Iterate over the oops in the mark stack and all local queues. It 1.677 + // also calls invalidate_entries_into_cset() on the region stack. 1.678 + void oops_do(OopClosure* f); 1.679 + // It is called at the end of an evacuation pause during marking so 1.680 + // that CM is notified of where the new end of the heap is. It 1.681 + // doesn't do anything if concurrent_marking_in_progress() is false, 1.682 + // unless the force parameter is true. 1.683 + void update_g1_committed(bool force = false); 1.684 + 1.685 + void complete_marking_in_collection_set(); 1.686 + 1.687 + // It indicates that a new collection set is being chosen. 1.688 + void newCSet(); 1.689 + // It registers a collection set heap region with CM. This is used 1.690 + // to determine whether any heap regions are located above the finger. 1.691 + void registerCSetRegion(HeapRegion* hr); 1.692 + 1.693 + // Returns "true" if at least one mark has been completed. 1.694 + bool at_least_one_mark_complete() { return _at_least_one_mark_complete; } 1.695 + 1.696 + bool isMarked(oop p) const { 1.697 + assert(p != NULL && p->is_oop(), "expected an oop"); 1.698 + HeapWord* addr = (HeapWord*)p; 1.699 + assert(addr >= _nextMarkBitMap->startWord() || 1.700 + addr < _nextMarkBitMap->endWord(), "in a region"); 1.701 + 1.702 + return _nextMarkBitMap->isMarked(addr); 1.703 + } 1.704 + 1.705 + inline bool not_yet_marked(oop p) const; 1.706 + 1.707 + // XXX Debug code 1.708 + bool containing_card_is_marked(void* p); 1.709 + bool containing_cards_are_marked(void* start, void* last); 1.710 + 1.711 + bool isPrevMarked(oop p) const { 1.712 + assert(p != NULL && p->is_oop(), "expected an oop"); 1.713 + HeapWord* addr = (HeapWord*)p; 1.714 + assert(addr >= _prevMarkBitMap->startWord() || 1.715 + addr < _prevMarkBitMap->endWord(), "in a region"); 1.716 + 1.717 + return _prevMarkBitMap->isMarked(addr); 1.718 + } 1.719 + 1.720 + inline bool do_yield_check(int worker_i = 0); 1.721 + inline bool should_yield(); 1.722 + 1.723 + // Called to abort the marking cycle after a Full GC takes palce. 1.724 + void abort(); 1.725 + 1.726 + void disable_co_trackers(); 1.727 + 1.728 + // This prints the global/local fingers. It is used for debugging. 1.729 + NOT_PRODUCT(void print_finger();) 1.730 + 1.731 + void print_summary_info(); 1.732 + 1.733 + // The following indicate whether a given verbose level has been 1.734 + // set. Notice that anything above stats is conditional to 1.735 + // _MARKING_VERBOSE_ having been set to 1 1.736 + bool verbose_stats() 1.737 + { return _verbose_level >= stats_verbose; } 1.738 + bool verbose_low() 1.739 + { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; } 1.740 + bool verbose_medium() 1.741 + { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; } 1.742 + bool verbose_high() 1.743 + { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; } 1.744 +}; 1.745 + 1.746 +// A class representing a marking task. 1.747 +class CMTask : public TerminatorTerminator { 1.748 +private: 1.749 + enum PrivateConstants { 1.750 + // the regular clock call is called once the scanned words reaches 1.751 + // this limit 1.752 + words_scanned_period = 12*1024, 1.753 + // the regular clock call is called once the number of visited 1.754 + // references reaches this limit 1.755 + refs_reached_period = 384, 1.756 + // initial value for the hash seed, used in the work stealing code 1.757 + init_hash_seed = 17, 1.758 + // how many entries will be transferred between global stack and 1.759 + // local queues 1.760 + global_stack_transfer_size = 16 1.761 + }; 1.762 + 1.763 + int _task_id; 1.764 + G1CollectedHeap* _g1h; 1.765 + ConcurrentMark* _cm; 1.766 + CMBitMap* _nextMarkBitMap; 1.767 + // the task queue of this task 1.768 + CMTaskQueue* _task_queue; 1.769 + // the task queue set---needed for stealing 1.770 + CMTaskQueueSet* _task_queues; 1.771 + // indicates whether the task has been claimed---this is only for 1.772 + // debugging purposes 1.773 + bool _claimed; 1.774 + 1.775 + // number of calls to this task 1.776 + int _calls; 1.777 + 1.778 + // concurrent overhead over a single CPU for this task 1.779 + COTracker _co_tracker; 1.780 + 1.781 + // when the virtual timer reaches this time, the marking step should 1.782 + // exit 1.783 + double _time_target_ms; 1.784 + // the start time of the current marking step 1.785 + double _start_time_ms; 1.786 + 1.787 + // the oop closure used for iterations over oops 1.788 + OopClosure* _oop_closure; 1.789 + 1.790 + // the region this task is scanning, NULL if we're not scanning any 1.791 + HeapRegion* _curr_region; 1.792 + // the local finger of this task, NULL if we're not scanning a region 1.793 + HeapWord* _finger; 1.794 + // limit of the region this task is scanning, NULL if we're not scanning one 1.795 + HeapWord* _region_limit; 1.796 + 1.797 + // This is used only when we scan regions popped from the region 1.798 + // stack. It records what the last object on such a region we 1.799 + // scanned was. It is used to ensure that, if we abort region 1.800 + // iteration, we do not rescan the first part of the region. This 1.801 + // should be NULL when we're not scanning a region from the region 1.802 + // stack. 1.803 + HeapWord* _region_finger; 1.804 + 1.805 + // the number of words this task has scanned 1.806 + size_t _words_scanned; 1.807 + // When _words_scanned reaches this limit, the regular clock is 1.808 + // called. Notice that this might be decreased under certain 1.809 + // circumstances (i.e. when we believe that we did an expensive 1.810 + // operation). 1.811 + size_t _words_scanned_limit; 1.812 + // the initial value of _words_scanned_limit (i.e. what it was 1.813 + // before it was decreased). 1.814 + size_t _real_words_scanned_limit; 1.815 + 1.816 + // the number of references this task has visited 1.817 + size_t _refs_reached; 1.818 + // When _refs_reached reaches this limit, the regular clock is 1.819 + // called. Notice this this might be decreased under certain 1.820 + // circumstances (i.e. when we believe that we did an expensive 1.821 + // operation). 1.822 + size_t _refs_reached_limit; 1.823 + // the initial value of _refs_reached_limit (i.e. what it was before 1.824 + // it was decreased). 1.825 + size_t _real_refs_reached_limit; 1.826 + 1.827 + // used by the work stealing stuff 1.828 + int _hash_seed; 1.829 + // if this is true, then the task has aborted for some reason 1.830 + bool _has_aborted; 1.831 + // set when the task aborts because it has met its time quota 1.832 + bool _has_aborted_timed_out; 1.833 + // true when we're draining SATB buffers; this avoids the task 1.834 + // aborting due to SATB buffers being available (as we're already 1.835 + // dealing with them) 1.836 + bool _draining_satb_buffers; 1.837 + 1.838 + // number sequence of past step times 1.839 + NumberSeq _step_times_ms; 1.840 + // elapsed time of this task 1.841 + double _elapsed_time_ms; 1.842 + // termination time of this task 1.843 + double _termination_time_ms; 1.844 + // when this task got into the termination protocol 1.845 + double _termination_start_time_ms; 1.846 + 1.847 + // true when the task is during a concurrent phase, false when it is 1.848 + // in the remark phase (so, in the latter case, we do not have to 1.849 + // check all the things that we have to check during the concurrent 1.850 + // phase, i.e. SATB buffer availability...) 1.851 + bool _concurrent; 1.852 + 1.853 + TruncatedSeq _marking_step_diffs_ms; 1.854 + 1.855 + // LOTS of statistics related with this task 1.856 +#if _MARKING_STATS_ 1.857 + NumberSeq _all_clock_intervals_ms; 1.858 + double _interval_start_time_ms; 1.859 + 1.860 + int _aborted; 1.861 + int _aborted_overflow; 1.862 + int _aborted_cm_aborted; 1.863 + int _aborted_yield; 1.864 + int _aborted_timed_out; 1.865 + int _aborted_satb; 1.866 + int _aborted_termination; 1.867 + 1.868 + int _steal_attempts; 1.869 + int _steals; 1.870 + 1.871 + int _clock_due_to_marking; 1.872 + int _clock_due_to_scanning; 1.873 + 1.874 + int _local_pushes; 1.875 + int _local_pops; 1.876 + int _local_max_size; 1.877 + int _objs_scanned; 1.878 + 1.879 + int _global_pushes; 1.880 + int _global_pops; 1.881 + int _global_max_size; 1.882 + 1.883 + int _global_transfers_to; 1.884 + int _global_transfers_from; 1.885 + 1.886 + int _region_stack_pops; 1.887 + 1.888 + int _regions_claimed; 1.889 + int _objs_found_on_bitmap; 1.890 + 1.891 + int _satb_buffers_processed; 1.892 +#endif // _MARKING_STATS_ 1.893 + 1.894 + // it updates the local fields after this task has claimed 1.895 + // a new region to scan 1.896 + void setup_for_region(HeapRegion* hr); 1.897 + // it brings up-to-date the limit of the region 1.898 + void update_region_limit(); 1.899 + // it resets the local fields after a task has finished scanning a 1.900 + // region 1.901 + void giveup_current_region(); 1.902 + 1.903 + // called when either the words scanned or the refs visited limit 1.904 + // has been reached 1.905 + void reached_limit(); 1.906 + // recalculates the words scanned and refs visited limits 1.907 + void recalculate_limits(); 1.908 + // decreases the words scanned and refs visited limits when we reach 1.909 + // an expensive operation 1.910 + void decrease_limits(); 1.911 + // it checks whether the words scanned or refs visited reached their 1.912 + // respective limit and calls reached_limit() if they have 1.913 + void check_limits() { 1.914 + if (_words_scanned >= _words_scanned_limit || 1.915 + _refs_reached >= _refs_reached_limit) 1.916 + reached_limit(); 1.917 + } 1.918 + // this is supposed to be called regularly during a marking step as 1.919 + // it checks a bunch of conditions that might cause the marking step 1.920 + // to abort 1.921 + void regular_clock_call(); 1.922 + bool concurrent() { return _concurrent; } 1.923 + 1.924 +public: 1.925 + // It resets the task; it should be called right at the beginning of 1.926 + // a marking phase. 1.927 + void reset(CMBitMap* _nextMarkBitMap); 1.928 + // it clears all the fields that correspond to a claimed region. 1.929 + void clear_region_fields(); 1.930 + 1.931 + void set_concurrent(bool concurrent) { _concurrent = concurrent; } 1.932 + 1.933 + void enable_co_tracker() { 1.934 + guarantee( !_co_tracker.enabled(), "invariant" ); 1.935 + _co_tracker.enable(); 1.936 + } 1.937 + void disable_co_tracker() { 1.938 + guarantee( _co_tracker.enabled(), "invariant" ); 1.939 + _co_tracker.disable(); 1.940 + } 1.941 + bool co_tracker_enabled() { 1.942 + return _co_tracker.enabled(); 1.943 + } 1.944 + void reset_co_tracker(double starting_conc_overhead = 0.0) { 1.945 + _co_tracker.reset(starting_conc_overhead); 1.946 + } 1.947 + void start_co_tracker() { 1.948 + _co_tracker.start(); 1.949 + } 1.950 + void update_co_tracker(bool force_end = false) { 1.951 + _co_tracker.update(force_end); 1.952 + } 1.953 + 1.954 + // The main method of this class which performs a marking step 1.955 + // trying not to exceed the given duration. However, it might exit 1.956 + // prematurely, according to some conditions (i.e. SATB buffers are 1.957 + // available for processing). 1.958 + void do_marking_step(double target_ms); 1.959 + 1.960 + // These two calls start and stop the timer 1.961 + void record_start_time() { 1.962 + _elapsed_time_ms = os::elapsedTime() * 1000.0; 1.963 + } 1.964 + void record_end_time() { 1.965 + _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms; 1.966 + } 1.967 + 1.968 + // returns the task ID 1.969 + int task_id() { return _task_id; } 1.970 + 1.971 + // From TerminatorTerminator. It determines whether this task should 1.972 + // exit the termination protocol after it's entered it. 1.973 + virtual bool should_exit_termination(); 1.974 + 1.975 + HeapWord* finger() { return _finger; } 1.976 + 1.977 + bool has_aborted() { return _has_aborted; } 1.978 + void set_has_aborted() { _has_aborted = true; } 1.979 + void clear_has_aborted() { _has_aborted = false; } 1.980 + bool claimed() { return _claimed; } 1.981 + 1.982 + void set_oop_closure(OopClosure* oop_closure) { 1.983 + _oop_closure = oop_closure; 1.984 + } 1.985 + 1.986 + // It grays the object by marking it and, if necessary, pushing it 1.987 + // on the local queue 1.988 + void deal_with_reference(oop obj); 1.989 + 1.990 + // It scans an object and visits its children. 1.991 + void scan_object(oop obj) { 1.992 + tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), 1.993 + "invariant" ); 1.994 + 1.995 + if (_cm->verbose_high()) 1.996 + gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 1.997 + _task_id, (void*) obj); 1.998 + 1.999 + size_t obj_size = obj->size(); 1.1000 + _words_scanned += obj_size; 1.1001 + 1.1002 + obj->oop_iterate(_oop_closure); 1.1003 + statsOnly( ++_objs_scanned ); 1.1004 + check_limits(); 1.1005 + } 1.1006 + 1.1007 + // It pushes an object on the local queue. 1.1008 + void push(oop obj); 1.1009 + 1.1010 + // These two move entries to/from the global stack. 1.1011 + void move_entries_to_global_stack(); 1.1012 + void get_entries_from_global_stack(); 1.1013 + 1.1014 + // It pops and scans objects from the local queue. If partially is 1.1015 + // true, then it stops when the queue size is of a given limit. If 1.1016 + // partially is false, then it stops when the queue is empty. 1.1017 + void drain_local_queue(bool partially); 1.1018 + // It moves entries from the global stack to the local queue and 1.1019 + // drains the local queue. If partially is true, then it stops when 1.1020 + // both the global stack and the local queue reach a given size. If 1.1021 + // partially if false, it tries to empty them totally. 1.1022 + void drain_global_stack(bool partially); 1.1023 + // It keeps picking SATB buffers and processing them until no SATB 1.1024 + // buffers are available. 1.1025 + void drain_satb_buffers(); 1.1026 + // It keeps popping regions from the region stack and processing 1.1027 + // them until the region stack is empty. 1.1028 + void drain_region_stack(BitMapClosure* closure); 1.1029 + 1.1030 + // moves the local finger to a new location 1.1031 + inline void move_finger_to(HeapWord* new_finger) { 1.1032 + tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit, 1.1033 + "invariant" ); 1.1034 + _finger = new_finger; 1.1035 + } 1.1036 + 1.1037 + // moves the region finger to a new location 1.1038 + inline void move_region_finger_to(HeapWord* new_finger) { 1.1039 + tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" ); 1.1040 + _region_finger = new_finger; 1.1041 + } 1.1042 + 1.1043 + CMTask(int task_num, ConcurrentMark *cm, 1.1044 + CMTaskQueue* task_queue, CMTaskQueueSet* task_queues); 1.1045 + 1.1046 + // it prints statistics associated with this task 1.1047 + void print_stats(); 1.1048 + 1.1049 +#if _MARKING_STATS_ 1.1050 + void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; } 1.1051 +#endif // _MARKING_STATS_ 1.1052 +};