Fri, 26 Aug 2011 08:52:22 -0700
7059037: Use BIS for zeroing on T4
Summary: Use BIS for zeroing new allocated big (2Kb and more) objects and arrays.
Reviewed-by: never, twisti, ysr
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/systemDictionary.hpp"
27 #include "gc_implementation/shared/vmGCOperations.hpp"
28 #include "gc_interface/collectedHeap.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "oops/oop.inline.hpp"
31 #include "runtime/init.hpp"
32 #include "services/heapDumper.hpp"
33 #ifdef TARGET_OS_FAMILY_linux
34 # include "thread_linux.inline.hpp"
35 #endif
36 #ifdef TARGET_OS_FAMILY_solaris
37 # include "thread_solaris.inline.hpp"
38 #endif
39 #ifdef TARGET_OS_FAMILY_windows
40 # include "thread_windows.inline.hpp"
41 #endif
44 #ifdef ASSERT
45 int CollectedHeap::_fire_out_of_memory_count = 0;
46 #endif
48 size_t CollectedHeap::_filler_array_max_size = 0;
50 // Memory state functions.
53 CollectedHeap::CollectedHeap() : _n_par_threads(0)
55 {
56 const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT));
57 const size_t elements_per_word = HeapWordSize / sizeof(jint);
58 _filler_array_max_size = align_object_size(filler_array_hdr_size() +
59 max_len * elements_per_word);
61 _barrier_set = NULL;
62 _is_gc_active = false;
63 _total_collections = _total_full_collections = 0;
64 _gc_cause = _gc_lastcause = GCCause::_no_gc;
65 NOT_PRODUCT(_promotion_failure_alot_count = 0;)
66 NOT_PRODUCT(_promotion_failure_alot_gc_number = 0;)
68 if (UsePerfData) {
69 EXCEPTION_MARK;
71 // create the gc cause jvmstat counters
72 _perf_gc_cause = PerfDataManager::create_string_variable(SUN_GC, "cause",
73 80, GCCause::to_string(_gc_cause), CHECK);
75 _perf_gc_lastcause =
76 PerfDataManager::create_string_variable(SUN_GC, "lastCause",
77 80, GCCause::to_string(_gc_lastcause), CHECK);
78 }
79 _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
80 }
82 void CollectedHeap::pre_initialize() {
83 // Used for ReduceInitialCardMarks (when COMPILER2 is used);
84 // otherwise remains unused.
85 #ifdef COMPILER2
86 _defer_initial_card_mark = ReduceInitialCardMarks && can_elide_tlab_store_barriers()
87 && (DeferInitialCardMark || card_mark_must_follow_store());
88 #else
89 assert(_defer_initial_card_mark == false, "Who would set it?");
90 #endif
91 }
93 #ifndef PRODUCT
94 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) {
95 if (CheckMemoryInitialization && ZapUnusedHeapArea) {
96 for (size_t slot = 0; slot < size; slot += 1) {
97 assert((*(intptr_t*) (addr + slot)) != ((intptr_t) badHeapWordVal),
98 "Found badHeapWordValue in post-allocation check");
99 }
100 }
101 }
103 void CollectedHeap::check_for_non_bad_heap_word_value(HeapWord* addr, size_t size) {
104 if (CheckMemoryInitialization && ZapUnusedHeapArea) {
105 for (size_t slot = 0; slot < size; slot += 1) {
106 assert((*(intptr_t*) (addr + slot)) == ((intptr_t) badHeapWordVal),
107 "Found non badHeapWordValue in pre-allocation check");
108 }
109 }
110 }
111 #endif // PRODUCT
113 #ifdef ASSERT
114 void CollectedHeap::check_for_valid_allocation_state() {
115 Thread *thread = Thread::current();
116 // How to choose between a pending exception and a potential
117 // OutOfMemoryError? Don't allow pending exceptions.
118 // This is a VM policy failure, so how do we exhaustively test it?
119 assert(!thread->has_pending_exception(),
120 "shouldn't be allocating with pending exception");
121 if (StrictSafepointChecks) {
122 assert(thread->allow_allocation(),
123 "Allocation done by thread for which allocation is blocked "
124 "by No_Allocation_Verifier!");
125 // Allocation of an oop can always invoke a safepoint,
126 // hence, the true argument
127 thread->check_for_valid_safepoint_state(true);
128 }
129 }
130 #endif
132 HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) {
134 // Retain tlab and allocate object in shared space if
135 // the amount free in the tlab is too large to discard.
136 if (thread->tlab().free() > thread->tlab().refill_waste_limit()) {
137 thread->tlab().record_slow_allocation(size);
138 return NULL;
139 }
141 // Discard tlab and allocate a new one.
142 // To minimize fragmentation, the last TLAB may be smaller than the rest.
143 size_t new_tlab_size = thread->tlab().compute_size(size);
145 thread->tlab().clear_before_allocation();
147 if (new_tlab_size == 0) {
148 return NULL;
149 }
151 // Allocate a new TLAB...
152 HeapWord* obj = Universe::heap()->allocate_new_tlab(new_tlab_size);
153 if (obj == NULL) {
154 return NULL;
155 }
156 if (ZeroTLAB) {
157 // ..and clear it.
158 Copy::zero_to_words(obj, new_tlab_size);
159 } else {
160 // ...and zap just allocated object.
161 #ifdef ASSERT
162 // Skip mangling the space corresponding to the object header to
163 // ensure that the returned space is not considered parsable by
164 // any concurrent GC thread.
165 size_t hdr_size = oopDesc::header_size();
166 Copy::fill_to_words(obj + hdr_size, new_tlab_size - hdr_size, badHeapWordVal);
167 #endif // ASSERT
168 }
169 thread->tlab().fill(obj, obj + size, new_tlab_size);
170 return obj;
171 }
173 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
174 MemRegion deferred = thread->deferred_card_mark();
175 if (!deferred.is_empty()) {
176 assert(_defer_initial_card_mark, "Otherwise should be empty");
177 {
178 // Verify that the storage points to a parsable object in heap
179 DEBUG_ONLY(oop old_obj = oop(deferred.start());)
180 assert(is_in(old_obj), "Not in allocated heap");
181 assert(!can_elide_initializing_store_barrier(old_obj),
182 "Else should have been filtered in new_store_pre_barrier()");
183 assert(!is_in_permanent(old_obj), "Sanity: not expected");
184 assert(old_obj->is_oop(true), "Not an oop");
185 assert(old_obj->is_parsable(), "Will not be concurrently parsable");
186 assert(deferred.word_size() == (size_t)(old_obj->size()),
187 "Mismatch: multiple objects?");
188 }
189 BarrierSet* bs = barrier_set();
190 assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
191 bs->write_region(deferred);
192 // "Clear" the deferred_card_mark field
193 thread->set_deferred_card_mark(MemRegion());
194 }
195 assert(thread->deferred_card_mark().is_empty(), "invariant");
196 }
198 // Helper for ReduceInitialCardMarks. For performance,
199 // compiled code may elide card-marks for initializing stores
200 // to a newly allocated object along the fast-path. We
201 // compensate for such elided card-marks as follows:
202 // (a) Generational, non-concurrent collectors, such as
203 // GenCollectedHeap(ParNew,DefNew,Tenured) and
204 // ParallelScavengeHeap(ParallelGC, ParallelOldGC)
205 // need the card-mark if and only if the region is
206 // in the old gen, and do not care if the card-mark
207 // succeeds or precedes the initializing stores themselves,
208 // so long as the card-mark is completed before the next
209 // scavenge. For all these cases, we can do a card mark
210 // at the point at which we do a slow path allocation
211 // in the old gen, i.e. in this call.
212 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
213 // in addition that the card-mark for an old gen allocated
214 // object strictly follow any associated initializing stores.
215 // In these cases, the memRegion remembered below is
216 // used to card-mark the entire region either just before the next
217 // slow-path allocation by this thread or just before the next scavenge or
218 // CMS-associated safepoint, whichever of these events happens first.
219 // (The implicit assumption is that the object has been fully
220 // initialized by this point, a fact that we assert when doing the
221 // card-mark.)
222 // (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
223 // G1 concurrent marking is in progress an SATB (pre-write-)barrier is
224 // is used to remember the pre-value of any store. Initializing
225 // stores will not need this barrier, so we need not worry about
226 // compensating for the missing pre-barrier here. Turning now
227 // to the post-barrier, we note that G1 needs a RS update barrier
228 // which simply enqueues a (sequence of) dirty cards which may
229 // optionally be refined by the concurrent update threads. Note
230 // that this barrier need only be applied to a non-young write,
231 // but, like in CMS, because of the presence of concurrent refinement
232 // (much like CMS' precleaning), must strictly follow the oop-store.
233 // Thus, using the same protocol for maintaining the intended
234 // invariants turns out, serendepitously, to be the same for both
235 // G1 and CMS.
236 //
237 // For any future collector, this code should be reexamined with
238 // that specific collector in mind, and the documentation above suitably
239 // extended and updated.
240 oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
241 // If a previous card-mark was deferred, flush it now.
242 flush_deferred_store_barrier(thread);
243 if (can_elide_initializing_store_barrier(new_obj)) {
244 // The deferred_card_mark region should be empty
245 // following the flush above.
246 assert(thread->deferred_card_mark().is_empty(), "Error");
247 } else {
248 MemRegion mr((HeapWord*)new_obj, new_obj->size());
249 assert(!mr.is_empty(), "Error");
250 if (_defer_initial_card_mark) {
251 // Defer the card mark
252 thread->set_deferred_card_mark(mr);
253 } else {
254 // Do the card mark
255 BarrierSet* bs = barrier_set();
256 assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
257 bs->write_region(mr);
258 }
259 }
260 return new_obj;
261 }
263 size_t CollectedHeap::filler_array_hdr_size() {
264 return size_t(align_object_offset(arrayOopDesc::header_size(T_INT))); // align to Long
265 }
267 size_t CollectedHeap::filler_array_min_size() {
268 return align_object_size(filler_array_hdr_size()); // align to MinObjAlignment
269 }
271 size_t CollectedHeap::filler_array_max_size() {
272 return _filler_array_max_size;
273 }
275 #ifdef ASSERT
276 void CollectedHeap::fill_args_check(HeapWord* start, size_t words)
277 {
278 assert(words >= min_fill_size(), "too small to fill");
279 assert(words % MinObjAlignment == 0, "unaligned size");
280 assert(Universe::heap()->is_in_reserved(start), "not in heap");
281 assert(Universe::heap()->is_in_reserved(start + words - 1), "not in heap");
282 }
284 void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap)
285 {
286 if (ZapFillerObjects && zap) {
287 Copy::fill_to_words(start + filler_array_hdr_size(),
288 words - filler_array_hdr_size(), 0XDEAFBABE);
289 }
290 }
291 #endif // ASSERT
293 void
294 CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap)
295 {
296 assert(words >= filler_array_min_size(), "too small for an array");
297 assert(words <= filler_array_max_size(), "too big for a single object");
299 const size_t payload_size = words - filler_array_hdr_size();
300 const size_t len = payload_size * HeapWordSize / sizeof(jint);
302 // Set the length first for concurrent GC.
303 ((arrayOop)start)->set_length((int)len);
304 post_allocation_setup_common(Universe::intArrayKlassObj(), start, words);
305 DEBUG_ONLY(zap_filler_array(start, words, zap);)
306 }
308 void
309 CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words, bool zap)
310 {
311 assert(words <= filler_array_max_size(), "too big for a single object");
313 if (words >= filler_array_min_size()) {
314 fill_with_array(start, words, zap);
315 } else if (words > 0) {
316 assert(words == min_fill_size(), "unaligned size");
317 post_allocation_setup_common(SystemDictionary::Object_klass(), start,
318 words);
319 }
320 }
322 void CollectedHeap::fill_with_object(HeapWord* start, size_t words, bool zap)
323 {
324 DEBUG_ONLY(fill_args_check(start, words);)
325 HandleMark hm; // Free handles before leaving.
326 fill_with_object_impl(start, words, zap);
327 }
329 void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap)
330 {
331 DEBUG_ONLY(fill_args_check(start, words);)
332 HandleMark hm; // Free handles before leaving.
334 #ifdef _LP64
335 // A single array can fill ~8G, so multiple objects are needed only in 64-bit.
336 // First fill with arrays, ensuring that any remaining space is big enough to
337 // fill. The remainder is filled with a single object.
338 const size_t min = min_fill_size();
339 const size_t max = filler_array_max_size();
340 while (words > max) {
341 const size_t cur = words - max >= min ? max : max - min;
342 fill_with_array(start, cur, zap);
343 start += cur;
344 words -= cur;
345 }
346 #endif
348 fill_with_object_impl(start, words, zap);
349 }
351 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
352 guarantee(false, "thread-local allocation buffers not supported");
353 return NULL;
354 }
356 void CollectedHeap::ensure_parsability(bool retire_tlabs) {
357 // The second disjunct in the assertion below makes a concession
358 // for the start-up verification done while the VM is being
359 // created. Callers be careful that you know that mutators
360 // aren't going to interfere -- for instance, this is permissible
361 // if we are still single-threaded and have either not yet
362 // started allocating (nothing much to verify) or we have
363 // started allocating but are now a full-fledged JavaThread
364 // (and have thus made our TLAB's) available for filling.
365 assert(SafepointSynchronize::is_at_safepoint() ||
366 !is_init_completed(),
367 "Should only be called at a safepoint or at start-up"
368 " otherwise concurrent mutator activity may make heap "
369 " unparsable again");
370 const bool use_tlab = UseTLAB;
371 const bool deferred = _defer_initial_card_mark;
372 // The main thread starts allocating via a TLAB even before it
373 // has added itself to the threads list at vm boot-up.
374 assert(!use_tlab || Threads::first() != NULL,
375 "Attempt to fill tlabs before main thread has been added"
376 " to threads list is doomed to failure!");
377 for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
378 if (use_tlab) thread->tlab().make_parsable(retire_tlabs);
379 #ifdef COMPILER2
380 // The deferred store barriers must all have been flushed to the
381 // card-table (or other remembered set structure) before GC starts
382 // processing the card-table (or other remembered set).
383 if (deferred) flush_deferred_store_barrier(thread);
384 #else
385 assert(!deferred, "Should be false");
386 assert(thread->deferred_card_mark().is_empty(), "Should be empty");
387 #endif
388 }
389 }
391 void CollectedHeap::accumulate_statistics_all_tlabs() {
392 if (UseTLAB) {
393 assert(SafepointSynchronize::is_at_safepoint() ||
394 !is_init_completed(),
395 "should only accumulate statistics on tlabs at safepoint");
397 ThreadLocalAllocBuffer::accumulate_statistics_before_gc();
398 }
399 }
401 void CollectedHeap::resize_all_tlabs() {
402 if (UseTLAB) {
403 assert(SafepointSynchronize::is_at_safepoint() ||
404 !is_init_completed(),
405 "should only resize tlabs at safepoint");
407 ThreadLocalAllocBuffer::resize_all_tlabs();
408 }
409 }
411 void CollectedHeap::pre_full_gc_dump() {
412 if (HeapDumpBeforeFullGC) {
413 TraceTime tt("Heap Dump: ", PrintGCDetails, false, gclog_or_tty);
414 // We are doing a "major" collection and a heap dump before
415 // major collection has been requested.
416 HeapDumper::dump_heap();
417 }
418 if (PrintClassHistogramBeforeFullGC) {
419 TraceTime tt("Class Histogram: ", PrintGCDetails, true, gclog_or_tty);
420 VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */);
421 inspector.doit();
422 }
423 }
425 void CollectedHeap::post_full_gc_dump() {
426 if (HeapDumpAfterFullGC) {
427 TraceTime tt("Heap Dump", PrintGCDetails, false, gclog_or_tty);
428 HeapDumper::dump_heap();
429 }
430 if (PrintClassHistogramAfterFullGC) {
431 TraceTime tt("Class Histogram", PrintGCDetails, true, gclog_or_tty);
432 VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */);
433 inspector.doit();
434 }
435 }