Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "gc_implementation/g1/collectionSetChooser.hpp"
27 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
28 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
29 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
30 #include "memory/space.inline.hpp"
32 // Even though we don't use the GC efficiency in our heuristics as
33 // much as we used to, we still order according to GC efficiency. This
34 // will cause regions with a lot of live objects and large RSets to
35 // end up at the end of the array. Given that we might skip collecting
36 // the last few old regions, if after a few mixed GCs the remaining
37 // have reclaimable bytes under a certain threshold, the hope is that
38 // the ones we'll skip are ones with both large RSets and a lot of
39 // live objects, not the ones with just a lot of live objects if we
40 // ordered according to the amount of reclaimable bytes per region.
41 static int order_regions(HeapRegion* hr1, HeapRegion* hr2) {
42 if (hr1 == NULL) {
43 if (hr2 == NULL) {
44 return 0;
45 } else {
46 return 1;
47 }
48 } else if (hr2 == NULL) {
49 return -1;
50 }
52 double gc_eff1 = hr1->gc_efficiency();
53 double gc_eff2 = hr2->gc_efficiency();
54 if (gc_eff1 > gc_eff2) {
55 return -1;
56 } if (gc_eff1 < gc_eff2) {
57 return 1;
58 } else {
59 return 0;
60 }
61 }
63 static int order_regions(HeapRegion** hr1p, HeapRegion** hr2p) {
64 return order_regions(*hr1p, *hr2p);
65 }
67 CollectionSetChooser::CollectionSetChooser() :
68 // The line below is the worst bit of C++ hackery I've ever written
69 // (Detlefs, 11/23). You should think of it as equivalent to
70 // "_regions(100, true)": initialize the growable array and inform it
71 // that it should allocate its elem array(s) on the C heap.
72 //
73 // The first argument, however, is actually a comma expression
74 // (set_allocation_type(this, C_HEAP), 100). The purpose of the
75 // set_allocation_type() call is to replace the default allocation
76 // type for embedded objects STACK_OR_EMBEDDED with C_HEAP. It will
77 // allow to pass the assert in GenericGrowableArray() which checks
78 // that a growable array object must be on C heap if elements are.
79 //
80 // Note: containing object is allocated on C heap since it is CHeapObj.
81 //
82 _regions((ResourceObj::set_allocation_type((address) &_regions,
83 ResourceObj::C_HEAP),
84 100), true /* C_Heap */),
85 _curr_index(0), _length(0), _first_par_unreserved_idx(0),
86 _region_live_threshold_bytes(0), _remaining_reclaimable_bytes(0) {
87 _region_live_threshold_bytes =
88 HeapRegion::GrainBytes * (size_t) G1MixedGCLiveThresholdPercent / 100;
89 }
91 #ifndef PRODUCT
92 void CollectionSetChooser::verify() {
93 guarantee(_length <= regions_length(),
94 err_msg("_length: %u regions length: %u", _length, regions_length()));
95 guarantee(_curr_index <= _length,
96 err_msg("_curr_index: %u _length: %u", _curr_index, _length));
97 uint index = 0;
98 size_t sum_of_reclaimable_bytes = 0;
99 while (index < _curr_index) {
100 guarantee(regions_at(index) == NULL,
101 "all entries before _curr_index should be NULL");
102 index += 1;
103 }
104 HeapRegion *prev = NULL;
105 while (index < _length) {
106 HeapRegion *curr = regions_at(index++);
107 guarantee(curr != NULL, "Regions in _regions array cannot be NULL");
108 guarantee(!curr->is_young(), "should not be young!");
109 guarantee(!curr->isHumongous(), "should not be humongous!");
110 if (prev != NULL) {
111 guarantee(order_regions(prev, curr) != 1,
112 err_msg("GC eff prev: %1.4f GC eff curr: %1.4f",
113 prev->gc_efficiency(), curr->gc_efficiency()));
114 }
115 sum_of_reclaimable_bytes += curr->reclaimable_bytes();
116 prev = curr;
117 }
118 guarantee(sum_of_reclaimable_bytes == _remaining_reclaimable_bytes,
119 err_msg("reclaimable bytes inconsistent, "
120 "remaining: "SIZE_FORMAT" sum: "SIZE_FORMAT,
121 _remaining_reclaimable_bytes, sum_of_reclaimable_bytes));
122 }
123 #endif // !PRODUCT
125 void CollectionSetChooser::sort_regions() {
126 // First trim any unused portion of the top in the parallel case.
127 if (_first_par_unreserved_idx > 0) {
128 assert(_first_par_unreserved_idx <= regions_length(),
129 "Or we didn't reserved enough length");
130 regions_trunc_to(_first_par_unreserved_idx);
131 }
132 _regions.sort(order_regions);
133 assert(_length <= regions_length(), "Requirement");
134 #ifdef ASSERT
135 for (uint i = 0; i < _length; i++) {
136 assert(regions_at(i) != NULL, "Should be true by sorting!");
137 }
138 #endif // ASSERT
139 if (G1PrintRegionLivenessInfo) {
140 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
141 for (uint i = 0; i < _length; ++i) {
142 HeapRegion* r = regions_at(i);
143 cl.doHeapRegion(r);
144 }
145 }
146 verify();
147 }
150 void CollectionSetChooser::add_region(HeapRegion* hr) {
151 assert(!hr->isHumongous(),
152 "Humongous regions shouldn't be added to the collection set");
153 assert(!hr->is_young(), "should not be young!");
154 _regions.append(hr);
155 _length++;
156 _remaining_reclaimable_bytes += hr->reclaimable_bytes();
157 hr->calc_gc_efficiency();
158 }
160 void CollectionSetChooser::prepare_for_par_region_addition(uint n_regions,
161 uint chunk_size) {
162 _first_par_unreserved_idx = 0;
163 uint n_threads = (uint) ParallelGCThreads;
164 if (UseDynamicNumberOfGCThreads) {
165 assert(G1CollectedHeap::heap()->workers()->active_workers() > 0,
166 "Should have been set earlier");
167 // This is defensive code. As the assertion above says, the number
168 // of active threads should be > 0, but in case there is some path
169 // or some improperly initialized variable with leads to no
170 // active threads, protect against that in a product build.
171 n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
172 1U);
173 }
174 uint max_waste = n_threads * chunk_size;
175 // it should be aligned with respect to chunk_size
176 uint aligned_n_regions = (n_regions + chunk_size - 1) / chunk_size * chunk_size;
177 assert(aligned_n_regions % chunk_size == 0, "should be aligned");
178 regions_at_put_grow(aligned_n_regions + max_waste - 1, NULL);
179 }
181 uint CollectionSetChooser::claim_array_chunk(uint chunk_size) {
182 uint res = (uint) Atomic::add((jint) chunk_size,
183 (volatile jint*) &_first_par_unreserved_idx);
184 assert(regions_length() > res + chunk_size - 1,
185 "Should already have been expanded");
186 return res - chunk_size;
187 }
189 void CollectionSetChooser::set_region(uint index, HeapRegion* hr) {
190 assert(regions_at(index) == NULL, "precondition");
191 assert(!hr->is_young(), "should not be young!");
192 regions_at_put(index, hr);
193 hr->calc_gc_efficiency();
194 }
196 void CollectionSetChooser::update_totals(uint region_num,
197 size_t reclaimable_bytes) {
198 // Only take the lock if we actually need to update the totals.
199 if (region_num > 0) {
200 assert(reclaimable_bytes > 0, "invariant");
201 // We could have just used atomics instead of taking the
202 // lock. However, we currently don't have an atomic add for size_t.
203 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
204 _length += region_num;
205 _remaining_reclaimable_bytes += reclaimable_bytes;
206 } else {
207 assert(reclaimable_bytes == 0, "invariant");
208 }
209 }
211 void CollectionSetChooser::clear() {
212 _regions.clear();
213 _curr_index = 0;
214 _length = 0;
215 _remaining_reclaimable_bytes = 0;
216 };