Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
27 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
28 #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
29 #include "oops/oop.inline.hpp"
30 #include "oops/oop.pcgc.inline.hpp"
31 #include "runtime/prefetch.inline.hpp"
33 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp)
34 : _g1h(g1h),
35 _refs(g1h->task_queue(queue_num)),
36 _dcq(&g1h->dirty_card_queue_set()),
37 _ct_bs(g1h->g1_barrier_set()),
38 _g1_rem(g1h->g1_rem_set()),
39 _hash_seed(17), _queue_num(queue_num),
40 _term_attempts(0),
41 _age_table(false), _scanner(g1h, rp),
42 _strong_roots_time(0), _term_time(0) {
43 _scanner.set_par_scan_thread_state(this);
44 // we allocate G1YoungSurvRateNumRegions plus one entries, since
45 // we "sacrifice" entry 0 to keep track of surviving bytes for
46 // non-young regions (where the age is -1)
47 // We also add a few elements at the beginning and at the end in
48 // an attempt to eliminate cache contention
49 uint real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
50 uint array_length = PADDING_ELEM_NUM +
51 real_length +
52 PADDING_ELEM_NUM;
53 _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
54 if (_surviving_young_words_base == NULL)
55 vm_exit_out_of_memory(array_length * sizeof(size_t), OOM_MALLOC_ERROR,
56 "Not enough space for young surv histo.");
57 _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
58 memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
60 _g1_par_allocator = G1ParGCAllocator::create_allocator(_g1h);
62 _start = os::elapsedTime();
63 }
65 G1ParScanThreadState::~G1ParScanThreadState() {
66 _g1_par_allocator->retire_alloc_buffers();
67 delete _g1_par_allocator;
68 FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base, mtGC);
69 }
71 void
72 G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st)
73 {
74 st->print_raw_cr("GC Termination Stats");
75 st->print_raw_cr(" elapsed --strong roots-- -------termination-------"
76 " ------waste (KiB)------");
77 st->print_raw_cr("thr ms ms % ms % attempts"
78 " total alloc undo");
79 st->print_raw_cr("--- --------- --------- ------ --------- ------ --------"
80 " ------- ------- -------");
81 }
83 void
84 G1ParScanThreadState::print_termination_stats(int i,
85 outputStream* const st) const
86 {
87 const double elapsed_ms = elapsed_time() * 1000.0;
88 const double s_roots_ms = strong_roots_time() * 1000.0;
89 const double term_ms = term_time() * 1000.0;
90 const size_t alloc_buffer_waste = _g1_par_allocator->alloc_buffer_waste();
91 const size_t undo_waste = _g1_par_allocator->undo_waste();
92 st->print_cr("%3d %9.2f %9.2f %6.2f "
93 "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
94 SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
95 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
96 term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
97 (alloc_buffer_waste + undo_waste) * HeapWordSize / K,
98 alloc_buffer_waste * HeapWordSize / K,
99 undo_waste * HeapWordSize / K);
100 }
102 #ifdef ASSERT
103 bool G1ParScanThreadState::verify_ref(narrowOop* ref) const {
104 assert(ref != NULL, "invariant");
105 assert(UseCompressedOops, "sanity");
106 assert(!has_partial_array_mask(ref), err_msg("ref=" PTR_FORMAT, p2i(ref)));
107 oop p = oopDesc::load_decode_heap_oop(ref);
108 assert(_g1h->is_in_g1_reserved(p),
109 err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, p2i(ref), p2i(p)));
110 return true;
111 }
113 bool G1ParScanThreadState::verify_ref(oop* ref) const {
114 assert(ref != NULL, "invariant");
115 if (has_partial_array_mask(ref)) {
116 // Must be in the collection set--it's already been copied.
117 oop p = clear_partial_array_mask(ref);
118 assert(_g1h->obj_in_cs(p),
119 err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, p2i(ref), p2i(p)));
120 } else {
121 oop p = oopDesc::load_decode_heap_oop(ref);
122 assert(_g1h->is_in_g1_reserved(p),
123 err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, p2i(ref), p2i(p)));
124 }
125 return true;
126 }
128 bool G1ParScanThreadState::verify_task(StarTask ref) const {
129 if (ref.is_narrow()) {
130 return verify_ref((narrowOop*) ref);
131 } else {
132 return verify_ref((oop*) ref);
133 }
134 }
135 #endif // ASSERT
137 void G1ParScanThreadState::trim_queue() {
138 assert(_evac_failure_cl != NULL, "not set");
140 StarTask ref;
141 do {
142 // Drain the overflow stack first, so other threads can steal.
143 while (_refs->pop_overflow(ref)) {
144 dispatch_reference(ref);
145 }
147 while (_refs->pop_local(ref)) {
148 dispatch_reference(ref);
149 }
150 } while (!_refs->is_empty());
151 }
153 oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
154 size_t word_sz = old->size();
155 HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
156 // +1 to make the -1 indexes valid...
157 int young_index = from_region->young_index_in_cset()+1;
158 assert( (from_region->is_young() && young_index > 0) ||
159 (!from_region->is_young() && young_index == 0), "invariant" );
160 G1CollectorPolicy* g1p = _g1h->g1_policy();
161 markOop m = old->mark();
162 int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
163 : m->age();
164 GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
165 word_sz);
166 AllocationContext_t context = from_region->allocation_context();
167 HeapWord* obj_ptr = _g1_par_allocator->allocate(alloc_purpose, word_sz, context);
168 #ifndef PRODUCT
169 // Should this evacuation fail?
170 if (_g1h->evacuation_should_fail()) {
171 if (obj_ptr != NULL) {
172 _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
173 obj_ptr = NULL;
174 }
175 }
176 #endif // !PRODUCT
178 if (obj_ptr == NULL) {
179 // This will either forward-to-self, or detect that someone else has
180 // installed a forwarding pointer.
181 return _g1h->handle_evacuation_failure_par(this, old);
182 }
184 oop obj = oop(obj_ptr);
186 // We're going to allocate linearly, so might as well prefetch ahead.
187 Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
189 oop forward_ptr = old->forward_to_atomic(obj);
190 if (forward_ptr == NULL) {
191 Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
193 // alloc_purpose is just a hint to allocate() above, recheck the type of region
194 // we actually allocated from and update alloc_purpose accordingly
195 HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
196 alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
198 if (g1p->track_object_age(alloc_purpose)) {
199 // We could simply do obj->incr_age(). However, this causes a
200 // performance issue. obj->incr_age() will first check whether
201 // the object has a displaced mark by checking its mark word;
202 // getting the mark word from the new location of the object
203 // stalls. So, given that we already have the mark word and we
204 // are about to install it anyway, it's better to increase the
205 // age on the mark word, when the object does not have a
206 // displaced mark word. We're not expecting many objects to have
207 // a displaced marked word, so that case is not optimized
208 // further (it could be...) and we simply call obj->incr_age().
210 if (m->has_displaced_mark_helper()) {
211 // in this case, we have to install the mark word first,
212 // otherwise obj looks to be forwarded (the old mark word,
213 // which contains the forward pointer, was copied)
214 obj->set_mark(m);
215 obj->incr_age();
216 } else {
217 m = m->incr_age();
218 obj->set_mark(m);
219 }
220 age_table()->add(obj, word_sz);
221 } else {
222 obj->set_mark(m);
223 }
225 if (G1StringDedup::is_enabled()) {
226 G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
227 to_region->is_young(),
228 queue_num(),
229 obj);
230 }
232 size_t* surv_young_words = surviving_young_words();
233 surv_young_words[young_index] += word_sz;
235 if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
236 // We keep track of the next start index in the length field of
237 // the to-space object. The actual length can be found in the
238 // length field of the from-space object.
239 arrayOop(obj)->set_length(0);
240 oop* old_p = set_partial_array_mask(old);
241 push_on_queue(old_p);
242 } else {
243 // No point in using the slower heap_region_containing() method,
244 // given that we know obj is in the heap.
245 _scanner.set_region(_g1h->heap_region_containing_raw(obj));
246 obj->oop_iterate_backwards(&_scanner);
247 }
248 } else {
249 _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
250 obj = forward_ptr;
251 }
252 return obj;
253 }