Mon, 26 Sep 2011 10:24:05 -0700
7081933: Use zeroing elimination optimization for large array
Summary: Don't zero new typeArray during runtime call if the allocation is followed by arraycopy into it.
Reviewed-by: twisti
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/systemDictionary.hpp"
27 #include "gc_implementation/shared/vmGCOperations.hpp"
28 #include "gc_interface/collectedHeap.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "oops/oop.inline.hpp"
31 #include "runtime/init.hpp"
32 #include "services/heapDumper.hpp"
33 #ifdef TARGET_OS_FAMILY_linux
34 # include "thread_linux.inline.hpp"
35 #endif
36 #ifdef TARGET_OS_FAMILY_solaris
37 # include "thread_solaris.inline.hpp"
38 #endif
39 #ifdef TARGET_OS_FAMILY_windows
40 # include "thread_windows.inline.hpp"
41 #endif
42 #ifdef TARGET_OS_FAMILY_bsd
43 # include "thread_bsd.inline.hpp"
44 #endif
47 #ifdef ASSERT
48 int CollectedHeap::_fire_out_of_memory_count = 0;
49 #endif
51 size_t CollectedHeap::_filler_array_max_size = 0;
53 // Memory state functions.
56 CollectedHeap::CollectedHeap() : _n_par_threads(0)
58 {
59 const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT));
60 const size_t elements_per_word = HeapWordSize / sizeof(jint);
61 _filler_array_max_size = align_object_size(filler_array_hdr_size() +
62 max_len * elements_per_word);
64 _barrier_set = NULL;
65 _is_gc_active = false;
66 _total_collections = _total_full_collections = 0;
67 _gc_cause = _gc_lastcause = GCCause::_no_gc;
68 NOT_PRODUCT(_promotion_failure_alot_count = 0;)
69 NOT_PRODUCT(_promotion_failure_alot_gc_number = 0;)
71 if (UsePerfData) {
72 EXCEPTION_MARK;
74 // create the gc cause jvmstat counters
75 _perf_gc_cause = PerfDataManager::create_string_variable(SUN_GC, "cause",
76 80, GCCause::to_string(_gc_cause), CHECK);
78 _perf_gc_lastcause =
79 PerfDataManager::create_string_variable(SUN_GC, "lastCause",
80 80, GCCause::to_string(_gc_lastcause), CHECK);
81 }
82 _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
83 }
85 void CollectedHeap::pre_initialize() {
86 // Used for ReduceInitialCardMarks (when COMPILER2 is used);
87 // otherwise remains unused.
88 #ifdef COMPILER2
89 _defer_initial_card_mark = ReduceInitialCardMarks && can_elide_tlab_store_barriers()
90 && (DeferInitialCardMark || card_mark_must_follow_store());
91 #else
92 assert(_defer_initial_card_mark == false, "Who would set it?");
93 #endif
94 }
96 #ifndef PRODUCT
97 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) {
98 if (CheckMemoryInitialization && ZapUnusedHeapArea) {
99 for (size_t slot = 0; slot < size; slot += 1) {
100 assert((*(intptr_t*) (addr + slot)) != ((intptr_t) badHeapWordVal),
101 "Found badHeapWordValue in post-allocation check");
102 }
103 }
104 }
106 void CollectedHeap::check_for_non_bad_heap_word_value(HeapWord* addr, size_t size) {
107 if (CheckMemoryInitialization && ZapUnusedHeapArea) {
108 for (size_t slot = 0; slot < size; slot += 1) {
109 assert((*(intptr_t*) (addr + slot)) == ((intptr_t) badHeapWordVal),
110 "Found non badHeapWordValue in pre-allocation check");
111 }
112 }
113 }
114 #endif // PRODUCT
116 #ifdef ASSERT
117 void CollectedHeap::check_for_valid_allocation_state() {
118 Thread *thread = Thread::current();
119 // How to choose between a pending exception and a potential
120 // OutOfMemoryError? Don't allow pending exceptions.
121 // This is a VM policy failure, so how do we exhaustively test it?
122 assert(!thread->has_pending_exception(),
123 "shouldn't be allocating with pending exception");
124 if (StrictSafepointChecks) {
125 assert(thread->allow_allocation(),
126 "Allocation done by thread for which allocation is blocked "
127 "by No_Allocation_Verifier!");
128 // Allocation of an oop can always invoke a safepoint,
129 // hence, the true argument
130 thread->check_for_valid_safepoint_state(true);
131 }
132 }
133 #endif
135 HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) {
137 // Retain tlab and allocate object in shared space if
138 // the amount free in the tlab is too large to discard.
139 if (thread->tlab().free() > thread->tlab().refill_waste_limit()) {
140 thread->tlab().record_slow_allocation(size);
141 return NULL;
142 }
144 // Discard tlab and allocate a new one.
145 // To minimize fragmentation, the last TLAB may be smaller than the rest.
146 size_t new_tlab_size = thread->tlab().compute_size(size);
148 thread->tlab().clear_before_allocation();
150 if (new_tlab_size == 0) {
151 return NULL;
152 }
154 // Allocate a new TLAB...
155 HeapWord* obj = Universe::heap()->allocate_new_tlab(new_tlab_size);
156 if (obj == NULL) {
157 return NULL;
158 }
159 if (ZeroTLAB) {
160 // ..and clear it.
161 Copy::zero_to_words(obj, new_tlab_size);
162 } else {
163 // ...and zap just allocated object.
164 #ifdef ASSERT
165 // Skip mangling the space corresponding to the object header to
166 // ensure that the returned space is not considered parsable by
167 // any concurrent GC thread.
168 size_t hdr_size = oopDesc::header_size();
169 Copy::fill_to_words(obj + hdr_size, new_tlab_size - hdr_size, badHeapWordVal);
170 #endif // ASSERT
171 }
172 thread->tlab().fill(obj, obj + size, new_tlab_size);
173 return obj;
174 }
176 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
177 MemRegion deferred = thread->deferred_card_mark();
178 if (!deferred.is_empty()) {
179 assert(_defer_initial_card_mark, "Otherwise should be empty");
180 {
181 // Verify that the storage points to a parsable object in heap
182 DEBUG_ONLY(oop old_obj = oop(deferred.start());)
183 assert(is_in(old_obj), "Not in allocated heap");
184 assert(!can_elide_initializing_store_barrier(old_obj),
185 "Else should have been filtered in new_store_pre_barrier()");
186 assert(!is_in_permanent(old_obj), "Sanity: not expected");
187 assert(old_obj->is_oop(true), "Not an oop");
188 assert(old_obj->is_parsable(), "Will not be concurrently parsable");
189 assert(deferred.word_size() == (size_t)(old_obj->size()),
190 "Mismatch: multiple objects?");
191 }
192 BarrierSet* bs = barrier_set();
193 assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
194 bs->write_region(deferred);
195 // "Clear" the deferred_card_mark field
196 thread->set_deferred_card_mark(MemRegion());
197 }
198 assert(thread->deferred_card_mark().is_empty(), "invariant");
199 }
201 // Helper for ReduceInitialCardMarks. For performance,
202 // compiled code may elide card-marks for initializing stores
203 // to a newly allocated object along the fast-path. We
204 // compensate for such elided card-marks as follows:
205 // (a) Generational, non-concurrent collectors, such as
206 // GenCollectedHeap(ParNew,DefNew,Tenured) and
207 // ParallelScavengeHeap(ParallelGC, ParallelOldGC)
208 // need the card-mark if and only if the region is
209 // in the old gen, and do not care if the card-mark
210 // succeeds or precedes the initializing stores themselves,
211 // so long as the card-mark is completed before the next
212 // scavenge. For all these cases, we can do a card mark
213 // at the point at which we do a slow path allocation
214 // in the old gen, i.e. in this call.
215 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
216 // in addition that the card-mark for an old gen allocated
217 // object strictly follow any associated initializing stores.
218 // In these cases, the memRegion remembered below is
219 // used to card-mark the entire region either just before the next
220 // slow-path allocation by this thread or just before the next scavenge or
221 // CMS-associated safepoint, whichever of these events happens first.
222 // (The implicit assumption is that the object has been fully
223 // initialized by this point, a fact that we assert when doing the
224 // card-mark.)
225 // (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
226 // G1 concurrent marking is in progress an SATB (pre-write-)barrier is
227 // is used to remember the pre-value of any store. Initializing
228 // stores will not need this barrier, so we need not worry about
229 // compensating for the missing pre-barrier here. Turning now
230 // to the post-barrier, we note that G1 needs a RS update barrier
231 // which simply enqueues a (sequence of) dirty cards which may
232 // optionally be refined by the concurrent update threads. Note
233 // that this barrier need only be applied to a non-young write,
234 // but, like in CMS, because of the presence of concurrent refinement
235 // (much like CMS' precleaning), must strictly follow the oop-store.
236 // Thus, using the same protocol for maintaining the intended
237 // invariants turns out, serendepitously, to be the same for both
238 // G1 and CMS.
239 //
240 // For any future collector, this code should be reexamined with
241 // that specific collector in mind, and the documentation above suitably
242 // extended and updated.
243 oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
244 // If a previous card-mark was deferred, flush it now.
245 flush_deferred_store_barrier(thread);
246 if (can_elide_initializing_store_barrier(new_obj)) {
247 // The deferred_card_mark region should be empty
248 // following the flush above.
249 assert(thread->deferred_card_mark().is_empty(), "Error");
250 } else {
251 MemRegion mr((HeapWord*)new_obj, new_obj->size());
252 assert(!mr.is_empty(), "Error");
253 if (_defer_initial_card_mark) {
254 // Defer the card mark
255 thread->set_deferred_card_mark(mr);
256 } else {
257 // Do the card mark
258 BarrierSet* bs = barrier_set();
259 assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
260 bs->write_region(mr);
261 }
262 }
263 return new_obj;
264 }
266 size_t CollectedHeap::filler_array_hdr_size() {
267 return size_t(align_object_offset(arrayOopDesc::header_size(T_INT))); // align to Long
268 }
270 size_t CollectedHeap::filler_array_min_size() {
271 return align_object_size(filler_array_hdr_size()); // align to MinObjAlignment
272 }
274 size_t CollectedHeap::filler_array_max_size() {
275 return _filler_array_max_size;
276 }
278 #ifdef ASSERT
279 void CollectedHeap::fill_args_check(HeapWord* start, size_t words)
280 {
281 assert(words >= min_fill_size(), "too small to fill");
282 assert(words % MinObjAlignment == 0, "unaligned size");
283 assert(Universe::heap()->is_in_reserved(start), "not in heap");
284 assert(Universe::heap()->is_in_reserved(start + words - 1), "not in heap");
285 }
287 void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap)
288 {
289 if (ZapFillerObjects && zap) {
290 Copy::fill_to_words(start + filler_array_hdr_size(),
291 words - filler_array_hdr_size(), 0XDEAFBABE);
292 }
293 }
294 #endif // ASSERT
296 void
297 CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap)
298 {
299 assert(words >= filler_array_min_size(), "too small for an array");
300 assert(words <= filler_array_max_size(), "too big for a single object");
302 const size_t payload_size = words - filler_array_hdr_size();
303 const size_t len = payload_size * HeapWordSize / sizeof(jint);
305 // Set the length first for concurrent GC.
306 ((arrayOop)start)->set_length((int)len);
307 post_allocation_setup_common(Universe::intArrayKlassObj(), start, words);
308 DEBUG_ONLY(zap_filler_array(start, words, zap);)
309 }
311 void
312 CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words, bool zap)
313 {
314 assert(words <= filler_array_max_size(), "too big for a single object");
316 if (words >= filler_array_min_size()) {
317 fill_with_array(start, words, zap);
318 } else if (words > 0) {
319 assert(words == min_fill_size(), "unaligned size");
320 post_allocation_setup_common(SystemDictionary::Object_klass(), start,
321 words);
322 }
323 }
325 void CollectedHeap::fill_with_object(HeapWord* start, size_t words, bool zap)
326 {
327 DEBUG_ONLY(fill_args_check(start, words);)
328 HandleMark hm; // Free handles before leaving.
329 fill_with_object_impl(start, words, zap);
330 }
332 void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap)
333 {
334 DEBUG_ONLY(fill_args_check(start, words);)
335 HandleMark hm; // Free handles before leaving.
337 #ifdef _LP64
338 // A single array can fill ~8G, so multiple objects are needed only in 64-bit.
339 // First fill with arrays, ensuring that any remaining space is big enough to
340 // fill. The remainder is filled with a single object.
341 const size_t min = min_fill_size();
342 const size_t max = filler_array_max_size();
343 while (words > max) {
344 const size_t cur = words - max >= min ? max : max - min;
345 fill_with_array(start, cur, zap);
346 start += cur;
347 words -= cur;
348 }
349 #endif
351 fill_with_object_impl(start, words, zap);
352 }
354 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
355 guarantee(false, "thread-local allocation buffers not supported");
356 return NULL;
357 }
359 void CollectedHeap::ensure_parsability(bool retire_tlabs) {
360 // The second disjunct in the assertion below makes a concession
361 // for the start-up verification done while the VM is being
362 // created. Callers be careful that you know that mutators
363 // aren't going to interfere -- for instance, this is permissible
364 // if we are still single-threaded and have either not yet
365 // started allocating (nothing much to verify) or we have
366 // started allocating but are now a full-fledged JavaThread
367 // (and have thus made our TLAB's) available for filling.
368 assert(SafepointSynchronize::is_at_safepoint() ||
369 !is_init_completed(),
370 "Should only be called at a safepoint or at start-up"
371 " otherwise concurrent mutator activity may make heap "
372 " unparsable again");
373 const bool use_tlab = UseTLAB;
374 const bool deferred = _defer_initial_card_mark;
375 // The main thread starts allocating via a TLAB even before it
376 // has added itself to the threads list at vm boot-up.
377 assert(!use_tlab || Threads::first() != NULL,
378 "Attempt to fill tlabs before main thread has been added"
379 " to threads list is doomed to failure!");
380 for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
381 if (use_tlab) thread->tlab().make_parsable(retire_tlabs);
382 #ifdef COMPILER2
383 // The deferred store barriers must all have been flushed to the
384 // card-table (or other remembered set structure) before GC starts
385 // processing the card-table (or other remembered set).
386 if (deferred) flush_deferred_store_barrier(thread);
387 #else
388 assert(!deferred, "Should be false");
389 assert(thread->deferred_card_mark().is_empty(), "Should be empty");
390 #endif
391 }
392 }
394 void CollectedHeap::accumulate_statistics_all_tlabs() {
395 if (UseTLAB) {
396 assert(SafepointSynchronize::is_at_safepoint() ||
397 !is_init_completed(),
398 "should only accumulate statistics on tlabs at safepoint");
400 ThreadLocalAllocBuffer::accumulate_statistics_before_gc();
401 }
402 }
404 void CollectedHeap::resize_all_tlabs() {
405 if (UseTLAB) {
406 assert(SafepointSynchronize::is_at_safepoint() ||
407 !is_init_completed(),
408 "should only resize tlabs at safepoint");
410 ThreadLocalAllocBuffer::resize_all_tlabs();
411 }
412 }
414 void CollectedHeap::pre_full_gc_dump() {
415 if (HeapDumpBeforeFullGC) {
416 TraceTime tt("Heap Dump (before full gc): ", PrintGCDetails, false, gclog_or_tty);
417 // We are doing a "major" collection and a heap dump before
418 // major collection has been requested.
419 HeapDumper::dump_heap();
420 }
421 if (PrintClassHistogramBeforeFullGC) {
422 TraceTime tt("Class Histogram (before full gc): ", PrintGCDetails, true, gclog_or_tty);
423 VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */);
424 inspector.doit();
425 }
426 }
428 void CollectedHeap::post_full_gc_dump() {
429 if (HeapDumpAfterFullGC) {
430 TraceTime tt("Heap Dump (after full gc): ", PrintGCDetails, false, gclog_or_tty);
431 HeapDumper::dump_heap();
432 }
433 if (PrintClassHistogramAfterFullGC) {
434 TraceTime tt("Class Histogram (after full gc): ", PrintGCDetails, true, gclog_or_tty);
435 VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */);
436 inspector.doit();
437 }
438 }