Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
26 #include "precompiled.hpp"
27 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
28 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
29 #include "gc_implementation/g1/g1Log.hpp"
30 #include "gc_implementation/g1/g1StringDedup.hpp"
32 // Helper class for avoiding interleaved logging
33 class LineBuffer: public StackObj {
35 private:
36 static const int BUFFER_LEN = 1024;
37 static const int INDENT_CHARS = 3;
38 char _buffer[BUFFER_LEN];
39 int _indent_level;
40 int _cur;
42 void vappend(const char* format, va_list ap) ATTRIBUTE_PRINTF(2, 0) {
43 int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
44 if (res != -1) {
45 _cur += res;
46 } else {
47 DEBUG_ONLY(warning("buffer too small in LineBuffer");)
48 _buffer[BUFFER_LEN -1] = 0;
49 _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
50 }
51 }
53 public:
54 explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
55 for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
56 _buffer[_cur] = ' ';
57 }
58 }
60 #ifndef PRODUCT
61 ~LineBuffer() {
62 assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
63 }
64 #endif
66 void append(const char* format, ...) ATTRIBUTE_PRINTF(2, 3) {
67 va_list ap;
68 va_start(ap, format);
69 vappend(format, ap);
70 va_end(ap);
71 }
73 void append_and_print_cr(const char* format, ...) ATTRIBUTE_PRINTF(2, 3) {
74 va_list ap;
75 va_start(ap, format);
76 vappend(format, ap);
77 va_end(ap);
78 gclog_or_tty->print_cr("%s", _buffer);
79 _cur = _indent_level * INDENT_CHARS;
80 }
81 };
83 PRAGMA_DIAG_PUSH
84 PRAGMA_FORMAT_NONLITERAL_IGNORED
85 template <class T>
86 void WorkerDataArray<T>::print(int level, const char* title) {
87 if (_length == 1) {
88 // No need for min, max, average and sum for only one worker
89 LineBuffer buf(level);
90 buf.append("[%s: ", title);
91 buf.append(_print_format, _data[0]);
92 buf.append_and_print_cr("]");
93 return;
94 }
96 T min = _data[0];
97 T max = _data[0];
98 T sum = 0;
100 LineBuffer buf(level);
101 buf.append("[%s:", title);
102 for (uint i = 0; i < _length; ++i) {
103 T val = _data[i];
104 min = MIN2(val, min);
105 max = MAX2(val, max);
106 sum += val;
107 if (G1Log::finest()) {
108 buf.append(" ");
109 buf.append(_print_format, val);
110 }
111 }
113 if (G1Log::finest()) {
114 buf.append_and_print_cr("%s", "");
115 }
117 double avg = (double)sum / (double)_length;
118 buf.append(" Min: ");
119 buf.append(_print_format, min);
120 buf.append(", Avg: ");
121 buf.append("%.1lf", avg); // Always print average as a double
122 buf.append(", Max: ");
123 buf.append(_print_format, max);
124 buf.append(", Diff: ");
125 buf.append(_print_format, max - min);
126 if (_print_sum) {
127 // for things like the start and end times the sum is not
128 // that relevant
129 buf.append(", Sum: ");
130 buf.append(_print_format, sum);
131 }
132 buf.append_and_print_cr("]");
133 }
134 PRAGMA_DIAG_POP
136 #ifndef PRODUCT
138 template <> const int WorkerDataArray<int>::_uninitialized = -1;
139 template <> const double WorkerDataArray<double>::_uninitialized = -1.0;
140 template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1;
142 template <class T>
143 void WorkerDataArray<T>::reset() {
144 for (uint i = 0; i < _length; i++) {
145 _data[i] = (T)_uninitialized;
146 }
147 }
149 template <class T>
150 void WorkerDataArray<T>::verify() {
151 for (uint i = 0; i < _length; i++) {
152 assert(_data[i] != _uninitialized,
153 err_msg("Invalid data for worker " UINT32_FORMAT ", data: %lf, uninitialized: %lf",
154 i, (double)_data[i], (double)_uninitialized));
155 }
156 }
158 #endif
160 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
161 _max_gc_threads(max_gc_threads),
162 _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
163 _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
164 _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
165 _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
166 _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
167 _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
168 _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
169 _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
170 _last_termination_times_ms(_max_gc_threads, "%.1lf"),
171 _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
172 _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
173 _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
174 _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
175 _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"),
176 _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT),
177 _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
178 _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
179 {
180 assert(max_gc_threads > 0, "Must have some GC threads");
181 }
183 void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
184 assert(active_gc_threads > 0, "The number of threads must be > 0");
185 assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
186 _active_gc_threads = active_gc_threads;
188 _last_gc_worker_start_times_ms.reset();
189 _last_ext_root_scan_times_ms.reset();
190 _last_satb_filtering_times_ms.reset();
191 _last_update_rs_times_ms.reset();
192 _last_update_rs_processed_buffers.reset();
193 _last_scan_rs_times_ms.reset();
194 _last_strong_code_root_scan_times_ms.reset();
195 _last_obj_copy_times_ms.reset();
196 _last_termination_times_ms.reset();
197 _last_termination_attempts.reset();
198 _last_gc_worker_end_times_ms.reset();
199 _last_gc_worker_times_ms.reset();
200 _last_gc_worker_other_times_ms.reset();
202 _last_redirty_logged_cards_time_ms.reset();
203 _last_redirty_logged_cards_processed_cards.reset();
205 }
207 void G1GCPhaseTimes::note_gc_end() {
208 _last_gc_worker_start_times_ms.verify();
209 _last_ext_root_scan_times_ms.verify();
210 _last_satb_filtering_times_ms.verify();
211 _last_update_rs_times_ms.verify();
212 _last_update_rs_processed_buffers.verify();
213 _last_scan_rs_times_ms.verify();
214 _last_strong_code_root_scan_times_ms.verify();
215 _last_obj_copy_times_ms.verify();
216 _last_termination_times_ms.verify();
217 _last_termination_attempts.verify();
218 _last_gc_worker_end_times_ms.verify();
220 for (uint i = 0; i < _active_gc_threads; i++) {
221 double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
222 _last_gc_worker_times_ms.set(i, worker_time);
224 double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
225 _last_satb_filtering_times_ms.get(i) +
226 _last_update_rs_times_ms.get(i) +
227 _last_scan_rs_times_ms.get(i) +
228 _last_strong_code_root_scan_times_ms.get(i) +
229 _last_obj_copy_times_ms.get(i) +
230 _last_termination_times_ms.get(i);
232 double worker_other_time = worker_time - worker_known_time;
233 _last_gc_worker_other_times_ms.set(i, worker_other_time);
234 }
236 _last_gc_worker_times_ms.verify();
237 _last_gc_worker_other_times_ms.verify();
239 _last_redirty_logged_cards_time_ms.verify();
240 _last_redirty_logged_cards_processed_cards.verify();
241 }
243 void G1GCPhaseTimes::note_string_dedup_fixup_start() {
244 _cur_string_dedup_queue_fixup_worker_times_ms.reset();
245 _cur_string_dedup_table_fixup_worker_times_ms.reset();
246 }
248 void G1GCPhaseTimes::note_string_dedup_fixup_end() {
249 _cur_string_dedup_queue_fixup_worker_times_ms.verify();
250 _cur_string_dedup_table_fixup_worker_times_ms.verify();
251 }
253 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
254 LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
255 }
257 void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
258 LineBuffer(level).append_and_print_cr("[%s: "SIZE_FORMAT"]", str, value);
259 }
261 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
262 LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
263 }
265 double G1GCPhaseTimes::accounted_time_ms() {
266 // Subtract the root region scanning wait time. It's initialized to
267 // zero at the start of the pause.
268 double misc_time_ms = _root_region_scan_wait_time_ms;
270 misc_time_ms += _cur_collection_par_time_ms;
272 // Now subtract the time taken to fix up roots in generated code
273 misc_time_ms += _cur_collection_code_root_fixup_time_ms;
275 // Strong code root purge time
276 misc_time_ms += _cur_strong_code_root_purge_time_ms;
278 if (G1StringDedup::is_enabled()) {
279 // String dedup fixup time
280 misc_time_ms += _cur_string_dedup_fixup_time_ms;
281 }
283 // Subtract the time taken to clean the card table from the
284 // current value of "other time"
285 misc_time_ms += _cur_clear_ct_time_ms;
287 return misc_time_ms;
288 }
290 void G1GCPhaseTimes::print(double pause_time_sec) {
291 if (_root_region_scan_wait_time_ms > 0.0) {
292 print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
293 }
294 if (G1CollectedHeap::use_parallel_gc_threads()) {
295 print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
296 _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
297 _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
298 if (_last_satb_filtering_times_ms.sum() > 0.0) {
299 _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
300 }
301 _last_update_rs_times_ms.print(2, "Update RS (ms)");
302 _last_update_rs_processed_buffers.print(3, "Processed Buffers");
303 _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
304 _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
305 _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
306 _last_termination_times_ms.print(2, "Termination (ms)");
307 if (G1Log::finest()) {
308 _last_termination_attempts.print(3, "Termination Attempts");
309 }
310 _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
311 _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
312 _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
313 } else {
314 _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
315 if (_last_satb_filtering_times_ms.sum() > 0.0) {
316 _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
317 }
318 _last_update_rs_times_ms.print(1, "Update RS (ms)");
319 _last_update_rs_processed_buffers.print(2, "Processed Buffers");
320 _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
321 _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
322 _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
323 }
324 print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
325 print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
326 if (G1StringDedup::is_enabled()) {
327 print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
328 _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
329 _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
330 }
331 print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
332 double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
333 print_stats(1, "Other", misc_time_ms);
334 if (_cur_verify_before_time_ms > 0.0) {
335 print_stats(2, "Verify Before", _cur_verify_before_time_ms);
336 }
337 if (G1CollectedHeap::heap()->evacuation_failed()) {
338 double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards +
339 _cur_evac_fail_restore_remsets;
340 print_stats(2, "Evacuation Failure", evac_fail_handling);
341 if (G1Log::finest()) {
342 print_stats(3, "Recalculate Used", _cur_evac_fail_recalc_used);
343 print_stats(3, "Remove Self Forwards", _cur_evac_fail_remove_self_forwards);
344 print_stats(3, "Restore RemSet", _cur_evac_fail_restore_remsets);
345 }
346 }
347 print_stats(2, "Choose CSet",
348 (_recorded_young_cset_choice_time_ms +
349 _recorded_non_young_cset_choice_time_ms));
350 print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
351 print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
352 print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
353 if (G1Log::finest()) {
354 _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty");
355 _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
356 }
357 if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
358 print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
359 if (G1Log::finest()) {
360 print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total);
361 print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
362 print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
363 }
364 }
365 print_stats(2, "Free CSet",
366 (_recorded_young_free_cset_time_ms +
367 _recorded_non_young_free_cset_time_ms));
368 if (G1Log::finest()) {
369 print_stats(3, "Young Free CSet", _recorded_young_free_cset_time_ms);
370 print_stats(3, "Non-Young Free CSet", _recorded_non_young_free_cset_time_ms);
371 }
372 if (_cur_verify_after_time_ms > 0.0) {
373 print_stats(2, "Verify After", _cur_verify_after_time_ms);
374 }
375 }