Mon, 26 Sep 2011 10:24:05 -0700
7081933: Use zeroing elimination optimization for large array
Summary: Don't zero new typeArray during runtime call if the allocation is followed by arraycopy into it.
Reviewed-by: twisti
1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Sun Sep 25 16:03:29 2011 -0700 1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Mon Sep 26 10:24:05 2011 -0700 1.3 @@ -2359,10 +2359,10 @@ 1.4 for (int off = 0; off < 64; off += 16) { 1.5 if (use_prefetch && (off & 31) == 0) { 1.6 if (ArraycopySrcPrefetchDistance > 0) { 1.7 - __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads); 1.8 + __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads); 1.9 } 1.10 if (ArraycopyDstPrefetchDistance > 0) { 1.11 - __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads); 1.12 + __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads); 1.13 } 1.14 } 1.15 __ ldx(from, off+0, O4);
2.1 --- a/src/share/vm/gc_interface/collectedHeap.hpp Sun Sep 25 16:03:29 2011 -0700 2.2 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Mon Sep 26 10:24:05 2011 -0700 2.3 @@ -322,6 +322,7 @@ 2.4 // General obj/array allocation facilities. 2.5 inline static oop obj_allocate(KlassHandle klass, int size, TRAPS); 2.6 inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS); 2.7 + inline static oop array_allocate_nozero(KlassHandle klass, int size, int length, TRAPS); 2.8 2.9 // Special obj/array allocation facilities. 2.10 // Some heaps may want to manage "permanent" data uniquely. These default
3.1 --- a/src/share/vm/gc_interface/collectedHeap.inline.hpp Sun Sep 25 16:03:29 2011 -0700 3.2 +++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp Mon Sep 26 10:24:05 2011 -0700 3.3 @@ -274,6 +274,23 @@ 3.4 return (oop)obj; 3.5 } 3.6 3.7 +oop CollectedHeap::array_allocate_nozero(KlassHandle klass, 3.8 + int size, 3.9 + int length, 3.10 + TRAPS) { 3.11 + debug_only(check_for_valid_allocation_state()); 3.12 + assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed"); 3.13 + assert(size >= 0, "int won't convert to size_t"); 3.14 + HeapWord* obj = common_mem_allocate_noinit(size, CHECK_NULL); 3.15 + ((oop)obj)->set_klass_gap(0); 3.16 + post_allocation_setup_array(klass, obj, size, length); 3.17 +#ifndef PRODUCT 3.18 + const size_t hs = oopDesc::header_size()+1; 3.19 + Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs); 3.20 +#endif 3.21 + return (oop)obj; 3.22 +} 3.23 + 3.24 oop CollectedHeap::permanent_obj_allocate(KlassHandle klass, int size, TRAPS) { 3.25 oop obj = permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL); 3.26 post_allocation_install_obj_klass(klass, obj, size);
4.1 --- a/src/share/vm/memory/oopFactory.cpp Sun Sep 25 16:03:29 2011 -0700 4.2 +++ b/src/share/vm/memory/oopFactory.cpp Mon Sep 26 10:24:05 2011 -0700 4.3 @@ -77,7 +77,14 @@ 4.4 typeArrayOop oopFactory::new_typeArray(BasicType type, int length, TRAPS) { 4.5 klassOop type_asKlassOop = Universe::typeArrayKlassObj(type); 4.6 typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop); 4.7 - typeArrayOop result = type_asArrayKlass->allocate(length, THREAD); 4.8 + typeArrayOop result = type_asArrayKlass->allocate_common(length, true, THREAD); 4.9 + return result; 4.10 +} 4.11 + 4.12 +typeArrayOop oopFactory::new_typeArray_nozero(BasicType type, int length, TRAPS) { 4.13 + klassOop type_asKlassOop = Universe::typeArrayKlassObj(type); 4.14 + typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop); 4.15 + typeArrayOop result = type_asArrayKlass->allocate_common(length, false, THREAD); 4.16 return result; 4.17 } 4.18
5.1 --- a/src/share/vm/memory/oopFactory.hpp Sun Sep 25 16:03:29 2011 -0700 5.2 +++ b/src/share/vm/memory/oopFactory.hpp Mon Sep 26 10:24:05 2011 -0700 5.3 @@ -63,6 +63,7 @@ 5.4 static typeArrayOop new_permanent_intArray (int length, TRAPS); // used for class file structures 5.5 5.6 static typeArrayOop new_typeArray(BasicType type, int length, TRAPS); 5.7 + static typeArrayOop new_typeArray_nozero(BasicType type, int length, TRAPS); 5.8 5.9 // Constant pools 5.10 static constantPoolOop new_constantPool (int length,
6.1 --- a/src/share/vm/oops/typeArrayKlass.cpp Sun Sep 25 16:03:29 2011 -0700 6.2 +++ b/src/share/vm/oops/typeArrayKlass.cpp Mon Sep 26 10:24:05 2011 -0700 6.3 @@ -76,7 +76,7 @@ 6.4 return k(); 6.5 } 6.6 6.7 -typeArrayOop typeArrayKlass::allocate(int length, TRAPS) { 6.8 +typeArrayOop typeArrayKlass::allocate_common(int length, bool do_zero, TRAPS) { 6.9 assert(log2_element_size() >= 0, "bad scale"); 6.10 if (length >= 0) { 6.11 if (length <= max_length()) { 6.12 @@ -84,7 +84,11 @@ 6.13 KlassHandle h_k(THREAD, as_klassOop()); 6.14 typeArrayOop t; 6.15 CollectedHeap* ch = Universe::heap(); 6.16 - t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL); 6.17 + if (do_zero) { 6.18 + t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL); 6.19 + } else { 6.20 + t = (typeArrayOop)CollectedHeap::array_allocate_nozero(h_k, (int)size, length, CHECK_NULL); 6.21 + } 6.22 assert(t->is_parsable(), "Don't publish unless parsable"); 6.23 return t; 6.24 } else {
7.1 --- a/src/share/vm/oops/typeArrayKlass.hpp Sun Sep 25 16:03:29 2011 -0700 7.2 +++ b/src/share/vm/oops/typeArrayKlass.hpp Mon Sep 26 10:24:05 2011 -0700 7.3 @@ -56,7 +56,8 @@ 7.4 bool compute_is_subtype_of(klassOop k); 7.5 7.6 // Allocation 7.7 - typeArrayOop allocate(int length, TRAPS); 7.8 + typeArrayOop allocate_common(int length, bool do_zero, TRAPS); 7.9 + typeArrayOop allocate(int length, TRAPS) { return allocate_common(length, true, THREAD); } 7.10 typeArrayOop allocate_permanent(int length, TRAPS); // used for class file structures 7.11 oop multi_allocate(int rank, jint* sizes, TRAPS); 7.12
8.1 --- a/src/share/vm/opto/library_call.cpp Sun Sep 25 16:03:29 2011 -0700 8.2 +++ b/src/share/vm/opto/library_call.cpp Mon Sep 26 10:24:05 2011 -0700 8.3 @@ -4658,6 +4658,7 @@ 8.4 // "You break it, you buy it." 8.5 InitializeNode* init = alloc->initialization(); 8.6 assert(init->is_complete(), "we just did this"); 8.7 + init->set_complete_with_arraycopy(); 8.8 assert(dest->is_CheckCastPP(), "sanity"); 8.9 assert(dest->in(0)->in(0) == init, "dest pinned"); 8.10 adr_type = TypeRawPtr::BOTTOM; // all initializations are into raw memory
9.1 --- a/src/share/vm/opto/macro.cpp Sun Sep 25 16:03:29 2011 -0700 9.2 +++ b/src/share/vm/opto/macro.cpp Mon Sep 26 10:24:05 2011 -0700 9.3 @@ -1685,9 +1685,21 @@ 9.4 9.5 void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) { 9.6 Node* length = alloc->in(AllocateNode::ALength); 9.7 + InitializeNode* init = alloc->initialization(); 9.8 + Node* klass_node = alloc->in(AllocateNode::KlassNode); 9.9 + ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass(); 9.10 + address slow_call_address; // Address of slow call 9.11 + if (init != NULL && init->is_complete_with_arraycopy() && 9.12 + k->is_type_array_klass()) { 9.13 + // Don't zero type array during slow allocation in VM since 9.14 + // it will be initialized later by arraycopy in compiled code. 9.15 + slow_call_address = OptoRuntime::new_array_nozero_Java(); 9.16 + } else { 9.17 + slow_call_address = OptoRuntime::new_array_Java(); 9.18 + } 9.19 expand_allocate_common(alloc, length, 9.20 OptoRuntime::new_array_Type(), 9.21 - OptoRuntime::new_array_Java()); 9.22 + slow_call_address); 9.23 } 9.24 9.25 //-----------------------mark_eliminated_locking_nodes-----------------------
10.1 --- a/src/share/vm/opto/memnode.cpp Sun Sep 25 16:03:29 2011 -0700 10.2 +++ b/src/share/vm/opto/memnode.cpp Mon Sep 26 10:24:05 2011 -0700 10.3 @@ -2847,7 +2847,7 @@ 10.4 10.5 //---------------------------InitializeNode------------------------------------ 10.6 InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop) 10.7 - : _is_complete(false), 10.8 + : _is_complete(Incomplete), 10.9 MemBarNode(C, adr_type, rawoop) 10.10 { 10.11 init_class_id(Class_Initialize); 10.12 @@ -2885,7 +2885,7 @@ 10.13 10.14 void InitializeNode::set_complete(PhaseGVN* phase) { 10.15 assert(!is_complete(), "caller responsibility"); 10.16 - _is_complete = true; 10.17 + _is_complete = Complete; 10.18 10.19 // After this node is complete, it contains a bunch of 10.20 // raw-memory initializations. There is no need for
11.1 --- a/src/share/vm/opto/memnode.hpp Sun Sep 25 16:03:29 2011 -0700 11.2 +++ b/src/share/vm/opto/memnode.hpp Mon Sep 26 10:24:05 2011 -0700 11.3 @@ -942,7 +942,12 @@ 11.4 class InitializeNode: public MemBarNode { 11.5 friend class AllocateNode; 11.6 11.7 - bool _is_complete; 11.8 + enum { 11.9 + Incomplete = 0, 11.10 + Complete = 1, 11.11 + WithArraycopy = 2 11.12 + }; 11.13 + int _is_complete; 11.14 11.15 public: 11.16 enum { 11.17 @@ -976,10 +981,12 @@ 11.18 // An InitializeNode must completed before macro expansion is done. 11.19 // Completion requires that the AllocateNode must be followed by 11.20 // initialization of the new memory to zero, then to any initializers. 11.21 - bool is_complete() { return _is_complete; } 11.22 + bool is_complete() { return _is_complete != Incomplete; } 11.23 + bool is_complete_with_arraycopy() { return (_is_complete & WithArraycopy) != 0; } 11.24 11.25 // Mark complete. (Must not yet be complete.) 11.26 void set_complete(PhaseGVN* phase); 11.27 + void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; } 11.28 11.29 #ifdef ASSERT 11.30 // ensure all non-degenerate stores are ordered and non-overlapping
12.1 --- a/src/share/vm/opto/runtime.cpp Sun Sep 25 16:03:29 2011 -0700 12.2 +++ b/src/share/vm/opto/runtime.cpp Mon Sep 26 10:24:05 2011 -0700 12.3 @@ -102,6 +102,7 @@ 12.4 // Compiled code entry points 12.5 address OptoRuntime::_new_instance_Java = NULL; 12.6 address OptoRuntime::_new_array_Java = NULL; 12.7 +address OptoRuntime::_new_array_nozero_Java = NULL; 12.8 address OptoRuntime::_multianewarray2_Java = NULL; 12.9 address OptoRuntime::_multianewarray3_Java = NULL; 12.10 address OptoRuntime::_multianewarray4_Java = NULL; 12.11 @@ -151,6 +152,7 @@ 12.12 // ------------------------------------------------------------------------------------------------------------------------------- 12.13 gen(env, _new_instance_Java , new_instance_Type , new_instance_C , 0 , true , false, false); 12.14 gen(env, _new_array_Java , new_array_Type , new_array_C , 0 , true , false, false); 12.15 + gen(env, _new_array_nozero_Java , new_array_Type , new_array_nozero_C , 0 , true , false, false); 12.16 gen(env, _multianewarray2_Java , multianewarray2_Type , multianewarray2_C , 0 , true , false, false); 12.17 gen(env, _multianewarray3_Java , multianewarray3_Type , multianewarray3_C , 0 , true , false, false); 12.18 gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false); 12.19 @@ -308,6 +310,36 @@ 12.20 } 12.21 JRT_END 12.22 12.23 +// array allocation without zeroing 12.24 +JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(klassOopDesc* array_type, int len, JavaThread *thread)) 12.25 + JRT_BLOCK; 12.26 +#ifndef PRODUCT 12.27 + SharedRuntime::_new_array_ctr++; // new array requires GC 12.28 +#endif 12.29 + assert(check_compiled_frame(thread), "incorrect caller"); 12.30 + 12.31 + // Scavenge and allocate an instance. 12.32 + oop result; 12.33 + 12.34 + assert(Klass::cast(array_type)->oop_is_typeArray(), "should be called only for type array"); 12.35 + // The oopFactory likes to work with the element type. 12.36 + BasicType elem_type = typeArrayKlass::cast(array_type)->element_type(); 12.37 + result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD); 12.38 + 12.39 + // Pass oops back through thread local storage. Our apparent type to Java 12.40 + // is that we return an oop, but we can block on exit from this routine and 12.41 + // a GC can trash the oop in C's return register. The generated stub will 12.42 + // fetch the oop from TLS after any possible GC. 12.43 + deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION); 12.44 + thread->set_vm_result(result); 12.45 + JRT_BLOCK_END; 12.46 + 12.47 + if (GraphKit::use_ReduceInitialCardMarks()) { 12.48 + // inform GC that we won't do card marks for initializing writes. 12.49 + new_store_pre_barrier(thread); 12.50 + } 12.51 +JRT_END 12.52 + 12.53 // Note: multianewarray for one dimension is handled inline by GraphKit::new_array. 12.54 12.55 // multianewarray for 2 dimensions
13.1 --- a/src/share/vm/opto/runtime.hpp Sun Sep 25 16:03:29 2011 -0700 13.2 +++ b/src/share/vm/opto/runtime.hpp Mon Sep 26 10:24:05 2011 -0700 13.3 @@ -114,6 +114,7 @@ 13.4 // References to generated stubs 13.5 static address _new_instance_Java; 13.6 static address _new_array_Java; 13.7 + static address _new_array_nozero_Java; 13.8 static address _multianewarray2_Java; 13.9 static address _multianewarray3_Java; 13.10 static address _multianewarray4_Java; 13.11 @@ -143,6 +144,7 @@ 13.12 13.13 // Allocate storage for a objArray or typeArray 13.14 static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread); 13.15 + static void new_array_nozero_C(klassOopDesc* array_klass, int len, JavaThread *thread); 13.16 13.17 // Post-slow-path-allocation, pre-initializing-stores step for 13.18 // implementing ReduceInitialCardMarks 13.19 @@ -208,6 +210,7 @@ 13.20 // access to runtime stubs entry points for java code 13.21 static address new_instance_Java() { return _new_instance_Java; } 13.22 static address new_array_Java() { return _new_array_Java; } 13.23 + static address new_array_nozero_Java() { return _new_array_nozero_Java; } 13.24 static address multianewarray2_Java() { return _multianewarray2_Java; } 13.25 static address multianewarray3_Java() { return _multianewarray3_Java; } 13.26 static address multianewarray4_Java() { return _multianewarray4_Java; }