Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP
28 #include "gc_implementation/g1/g1CodeCacheRemSet.hpp"
29 #include "gc_implementation/g1/sparsePRT.hpp"
31 // Remembered set for a heap region. Represent a set of "cards" that
32 // contain pointers into the owner heap region. Cards are defined somewhat
33 // abstractly, in terms of what the "BlockOffsetTable" in use can parse.
35 class G1CollectedHeap;
36 class G1BlockOffsetSharedArray;
37 class HeapRegion;
38 class HeapRegionRemSetIterator;
39 class PerRegionTable;
40 class SparsePRT;
41 class nmethod;
43 // Essentially a wrapper around SparsePRTCleanupTask. See
44 // sparsePRT.hpp for more details.
45 class HRRSCleanupTask : public SparsePRTCleanupTask {
46 };
48 // The FromCardCache remembers the most recently processed card on the heap on
49 // a per-region and per-thread basis.
50 class FromCardCache : public AllStatic {
51 private:
52 // Array of card indices. Indexed by thread X and heap region to minimize
53 // thread contention.
54 static int** _cache;
55 static uint _max_regions;
56 static size_t _static_mem_size;
58 public:
59 enum {
60 InvalidCard = -1 // Card value of an invalid card, i.e. a card index not otherwise used.
61 };
63 static void clear(uint region_idx);
65 // Returns true if the given card is in the cache at the given location, or
66 // replaces the card at that location and returns false.
67 static bool contains_or_replace(uint worker_id, uint region_idx, int card) {
68 int card_in_cache = at(worker_id, region_idx);
69 if (card_in_cache == card) {
70 return true;
71 } else {
72 set(worker_id, region_idx, card);
73 return false;
74 }
75 }
77 static int at(uint worker_id, uint region_idx) {
78 return _cache[worker_id][region_idx];
79 }
81 static void set(uint worker_id, uint region_idx, int val) {
82 _cache[worker_id][region_idx] = val;
83 }
85 static void initialize(uint n_par_rs, uint max_num_regions);
87 static void invalidate(uint start_idx, size_t num_regions);
89 static void print(outputStream* out = gclog_or_tty) PRODUCT_RETURN;
91 static size_t static_mem_size() {
92 return _static_mem_size;
93 }
94 };
96 // The "_coarse_map" is a bitmap with one bit for each region, where set
97 // bits indicate that the corresponding region may contain some pointer
98 // into the owning region.
100 // The "_fine_grain_entries" array is an open hash table of PerRegionTables
101 // (PRTs), indicating regions for which we're keeping the RS as a set of
102 // cards. The strategy is to cap the size of the fine-grain table,
103 // deleting an entry and setting the corresponding coarse-grained bit when
104 // we would overflow this cap.
106 // We use a mixture of locking and lock-free techniques here. We allow
107 // threads to locate PRTs without locking, but threads attempting to alter
108 // a bucket list obtain a lock. This means that any failing attempt to
109 // find a PRT must be retried with the lock. It might seem dangerous that
110 // a read can find a PRT that is concurrently deleted. This is all right,
111 // because:
112 //
113 // 1) We only actually free PRT's at safe points (though we reuse them at
114 // other times).
115 // 2) We find PRT's in an attempt to add entries. If a PRT is deleted,
116 // it's _coarse_map bit is set, so the that we were attempting to add
117 // is represented. If a deleted PRT is re-used, a thread adding a bit,
118 // thinking the PRT is for a different region, does no harm.
120 class OtherRegionsTable VALUE_OBJ_CLASS_SPEC {
121 friend class HeapRegionRemSetIterator;
123 G1CollectedHeap* _g1h;
124 Mutex* _m;
125 HeapRegion* _hr;
127 // These are protected by "_m".
128 BitMap _coarse_map;
129 size_t _n_coarse_entries;
130 static jint _n_coarsenings;
132 PerRegionTable** _fine_grain_regions;
133 size_t _n_fine_entries;
135 // The fine grain remembered sets are doubly linked together using
136 // their 'next' and 'prev' fields.
137 // This allows fast bulk freeing of all the fine grain remembered
138 // set entries, and fast finding of all of them without iterating
139 // over the _fine_grain_regions table.
140 PerRegionTable * _first_all_fine_prts;
141 PerRegionTable * _last_all_fine_prts;
143 // Used to sample a subset of the fine grain PRTs to determine which
144 // PRT to evict and coarsen.
145 size_t _fine_eviction_start;
146 static size_t _fine_eviction_stride;
147 static size_t _fine_eviction_sample_size;
149 SparsePRT _sparse_table;
151 // These are static after init.
152 static size_t _max_fine_entries;
153 static size_t _mod_max_fine_entries_mask;
155 // Requires "prt" to be the first element of the bucket list appropriate
156 // for "hr". If this list contains an entry for "hr", return it,
157 // otherwise return "NULL".
158 PerRegionTable* find_region_table(size_t ind, HeapRegion* hr) const;
160 // Find, delete, and return a candidate PerRegionTable, if any exists,
161 // adding the deleted region to the coarse bitmap. Requires the caller
162 // to hold _m, and the fine-grain table to be full.
163 PerRegionTable* delete_region_table();
165 // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
166 // be the correct index for "hr"), delete it and return true; else return
167 // false.
168 bool del_single_region_table(size_t ind, HeapRegion* hr);
170 // link/add the given fine grain remembered set into the "all" list
171 void link_to_all(PerRegionTable * prt);
172 // unlink/remove the given fine grain remembered set into the "all" list
173 void unlink_from_all(PerRegionTable * prt);
175 public:
176 OtherRegionsTable(HeapRegion* hr, Mutex* m);
178 HeapRegion* hr() const { return _hr; }
180 // For now. Could "expand" some tables in the future, so that this made
181 // sense.
182 void add_reference(OopOrNarrowOopStar from, int tid);
184 // Removes any entries shown by the given bitmaps to contain only dead
185 // objects.
186 void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
188 // Returns whether this remembered set (and all sub-sets) contain no entries.
189 bool is_empty() const;
191 size_t occupied() const;
192 size_t occ_fine() const;
193 size_t occ_coarse() const;
194 size_t occ_sparse() const;
196 static jint n_coarsenings() { return _n_coarsenings; }
198 // Returns size in bytes.
199 // Not const because it takes a lock.
200 size_t mem_size() const;
201 static size_t static_mem_size();
202 static size_t fl_mem_size();
204 bool contains_reference(OopOrNarrowOopStar from) const;
205 bool contains_reference_locked(OopOrNarrowOopStar from) const;
207 void clear();
209 // Specifically clear the from_card_cache.
210 void clear_fcc();
212 void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
214 // Declare the heap size (in # of regions) to the OtherRegionsTable.
215 // (Uses it to initialize from_card_cache).
216 static void initialize(uint max_regions);
218 // Declares that regions between start_idx <= i < start_idx + num_regions are
219 // not in use. Make sure that any entries for these regions are invalid.
220 static void invalidate(uint start_idx, size_t num_regions);
222 static void print_from_card_cache();
223 };
225 class HeapRegionRemSet : public CHeapObj<mtGC> {
226 friend class VMStructs;
227 friend class HeapRegionRemSetIterator;
229 public:
230 enum Event {
231 Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd
232 };
234 private:
235 G1BlockOffsetSharedArray* _bosa;
236 G1BlockOffsetSharedArray* bosa() const { return _bosa; }
238 // A set of code blobs (nmethods) whose code contains pointers into
239 // the region that owns this RSet.
240 G1CodeRootSet _code_roots;
242 Mutex _m;
244 OtherRegionsTable _other_regions;
246 enum ParIterState { Unclaimed, Claimed, Complete };
247 volatile ParIterState _iter_state;
248 volatile jlong _iter_claimed;
250 // Unused unless G1RecordHRRSOops is true.
252 static const int MaxRecorded = 1000000;
253 static OopOrNarrowOopStar* _recorded_oops;
254 static HeapWord** _recorded_cards;
255 static HeapRegion** _recorded_regions;
256 static int _n_recorded;
258 static const int MaxRecordedEvents = 1000;
259 static Event* _recorded_events;
260 static int* _recorded_event_index;
261 static int _n_recorded_events;
263 static void print_event(outputStream* str, Event evnt);
265 public:
266 HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr);
268 static uint num_par_rem_sets();
269 static void setup_remset_size();
271 HeapRegion* hr() const {
272 return _other_regions.hr();
273 }
275 bool is_empty() const {
276 return (strong_code_roots_list_length() == 0) && _other_regions.is_empty();
277 }
279 size_t occupied() {
280 MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
281 return occupied_locked();
282 }
283 size_t occupied_locked() {
284 return _other_regions.occupied();
285 }
286 size_t occ_fine() const {
287 return _other_regions.occ_fine();
288 }
289 size_t occ_coarse() const {
290 return _other_regions.occ_coarse();
291 }
292 size_t occ_sparse() const {
293 return _other_regions.occ_sparse();
294 }
296 static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
298 // Used in the sequential case.
299 void add_reference(OopOrNarrowOopStar from) {
300 _other_regions.add_reference(from, 0);
301 }
303 // Used in the parallel case.
304 void add_reference(OopOrNarrowOopStar from, int tid) {
305 _other_regions.add_reference(from, tid);
306 }
308 // Removes any entries shown by the given bitmaps to contain only dead
309 // objects.
310 void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
312 // The region is being reclaimed; clear its remset, and any mention of
313 // entries for this region in other remsets.
314 void clear();
315 void clear_locked();
317 // Attempt to claim the region. Returns true iff this call caused an
318 // atomic transition from Unclaimed to Claimed.
319 bool claim_iter();
320 // Sets the iteration state to "complete".
321 void set_iter_complete();
322 // Returns "true" iff the region's iteration is complete.
323 bool iter_is_complete();
325 // Support for claiming blocks of cards during iteration
326 size_t iter_claimed() const { return (size_t)_iter_claimed; }
327 // Claim the next block of cards
328 size_t iter_claimed_next(size_t step) {
329 size_t current, next;
330 do {
331 current = iter_claimed();
332 next = current + step;
333 } while (Atomic::cmpxchg((jlong)next, &_iter_claimed, (jlong)current) != (jlong)current);
334 return current;
335 }
336 void reset_for_par_iteration();
338 bool verify_ready_for_par_iteration() {
339 return (_iter_state == Unclaimed) && (_iter_claimed == 0);
340 }
342 // The actual # of bytes this hr_remset takes up.
343 // Note also includes the strong code root set.
344 size_t mem_size() {
345 MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
346 return _other_regions.mem_size()
347 // This correction is necessary because the above includes the second
348 // part.
349 + (sizeof(HeapRegionRemSet) - sizeof(OtherRegionsTable))
350 + strong_code_roots_mem_size();
351 }
353 // Returns the memory occupancy of all static data structures associated
354 // with remembered sets.
355 static size_t static_mem_size() {
356 return OtherRegionsTable::static_mem_size() + G1CodeRootSet::static_mem_size();
357 }
359 // Returns the memory occupancy of all free_list data structures associated
360 // with remembered sets.
361 static size_t fl_mem_size() {
362 return OtherRegionsTable::fl_mem_size();
363 }
365 bool contains_reference(OopOrNarrowOopStar from) const {
366 return _other_regions.contains_reference(from);
367 }
369 // Routines for managing the list of code roots that point into
370 // the heap region that owns this RSet.
371 void add_strong_code_root(nmethod* nm);
372 void add_strong_code_root_locked(nmethod* nm);
373 void remove_strong_code_root(nmethod* nm);
375 // Applies blk->do_code_blob() to each of the entries in
376 // the strong code roots list
377 void strong_code_roots_do(CodeBlobClosure* blk) const;
379 void clean_strong_code_roots(HeapRegion* hr);
381 // Returns the number of elements in the strong code roots list
382 size_t strong_code_roots_list_length() const {
383 return _code_roots.length();
384 }
386 // Returns true if the strong code roots contains the given
387 // nmethod.
388 bool strong_code_roots_list_contains(nmethod* nm) {
389 return _code_roots.contains(nm);
390 }
392 // Returns the amount of memory, in bytes, currently
393 // consumed by the strong code roots.
394 size_t strong_code_roots_mem_size();
396 void print() PRODUCT_RETURN;
398 // Called during a stop-world phase to perform any deferred cleanups.
399 static void cleanup();
401 // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
402 // (Uses it to initialize from_card_cache).
403 static void init_heap(uint max_regions) {
404 OtherRegionsTable::initialize(max_regions);
405 }
407 static void invalidate(uint start_idx, uint num_regions) {
408 OtherRegionsTable::invalidate(start_idx, num_regions);
409 }
411 #ifndef PRODUCT
412 static void print_from_card_cache() {
413 OtherRegionsTable::print_from_card_cache();
414 }
415 #endif
417 static void record(HeapRegion* hr, OopOrNarrowOopStar f);
418 static void print_recorded();
419 static void record_event(Event evnt);
421 // These are wrappers for the similarly-named methods on
422 // SparsePRT. Look at sparsePRT.hpp for more details.
423 static void reset_for_cleanup_tasks();
424 void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
425 static void finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task);
427 // Run unit tests.
428 #ifndef PRODUCT
429 static void test_prt();
430 static void test();
431 #endif
432 };
434 class HeapRegionRemSetIterator : public StackObj {
435 private:
436 // The region RSet over which we are iterating.
437 HeapRegionRemSet* _hrrs;
439 // Local caching of HRRS fields.
440 const BitMap* _coarse_map;
442 G1BlockOffsetSharedArray* _bosa;
443 G1CollectedHeap* _g1h;
445 // The number of cards yielded since initialization.
446 size_t _n_yielded_fine;
447 size_t _n_yielded_coarse;
448 size_t _n_yielded_sparse;
450 // Indicates what granularity of table that we are currently iterating over.
451 // We start iterating over the sparse table, progress to the fine grain
452 // table, and then finish with the coarse table.
453 enum IterState {
454 Sparse,
455 Fine,
456 Coarse
457 };
458 IterState _is;
460 // For both Coarse and Fine remembered set iteration this contains the
461 // first card number of the heap region we currently iterate over.
462 size_t _cur_region_card_offset;
464 // Current region index for the Coarse remembered set iteration.
465 int _coarse_cur_region_index;
466 size_t _coarse_cur_region_cur_card;
468 bool coarse_has_next(size_t& card_index);
470 // The PRT we are currently iterating over.
471 PerRegionTable* _fine_cur_prt;
472 // Card offset within the current PRT.
473 size_t _cur_card_in_prt;
475 // Update internal variables when switching to the given PRT.
476 void switch_to_prt(PerRegionTable* prt);
477 bool fine_has_next();
478 bool fine_has_next(size_t& card_index);
480 // The Sparse remembered set iterator.
481 SparsePRTIter _sparse_iter;
483 public:
484 HeapRegionRemSetIterator(HeapRegionRemSet* hrrs);
486 // If there remains one or more cards to be yielded, returns true and
487 // sets "card_index" to one of those cards (which is then considered
488 // yielded.) Otherwise, returns false (and leaves "card_index"
489 // undefined.)
490 bool has_next(size_t& card_index);
492 size_t n_yielded_fine() { return _n_yielded_fine; }
493 size_t n_yielded_coarse() { return _n_yielded_coarse; }
494 size_t n_yielded_sparse() { return _n_yielded_sparse; }
495 size_t n_yielded() {
496 return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse();
497 }
498 };
500 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP