7081933: Use zeroing elimination optimization for large array

Mon, 26 Sep 2011 10:24:05 -0700

author
kvn
date
Mon, 26 Sep 2011 10:24:05 -0700
changeset 3157
a92cdbac8b9e
parent 3156
f08d439fab8c
child 3158
cb315dc80374

7081933: Use zeroing elimination optimization for large array
Summary: Don't zero new typeArray during runtime call if the allocation is followed by arraycopy into it.
Reviewed-by: twisti

src/cpu/sparc/vm/stubGenerator_sparc.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_interface/collectedHeap.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_interface/collectedHeap.inline.hpp file | annotate | diff | comparison | revisions
src/share/vm/memory/oopFactory.cpp file | annotate | diff | comparison | revisions
src/share/vm/memory/oopFactory.hpp file | annotate | diff | comparison | revisions
src/share/vm/oops/typeArrayKlass.cpp file | annotate | diff | comparison | revisions
src/share/vm/oops/typeArrayKlass.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/macro.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/memnode.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/memnode.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/runtime.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Sun Sep 25 16:03:29 2011 -0700
     1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Mon Sep 26 10:24:05 2011 -0700
     1.3 @@ -2359,10 +2359,10 @@
     1.4      for (int off = 0; off < 64; off += 16) {
     1.5        if (use_prefetch && (off & 31) == 0) {
     1.6          if (ArraycopySrcPrefetchDistance > 0) {
     1.7 -          __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
     1.8 +          __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads);
     1.9          }
    1.10          if (ArraycopyDstPrefetchDistance > 0) {
    1.11 -          __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
    1.12 +          __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads);
    1.13          }
    1.14        }
    1.15        __ ldx(from,  off+0, O4);
     2.1 --- a/src/share/vm/gc_interface/collectedHeap.hpp	Sun Sep 25 16:03:29 2011 -0700
     2.2 +++ b/src/share/vm/gc_interface/collectedHeap.hpp	Mon Sep 26 10:24:05 2011 -0700
     2.3 @@ -322,6 +322,7 @@
     2.4    // General obj/array allocation facilities.
     2.5    inline static oop obj_allocate(KlassHandle klass, int size, TRAPS);
     2.6    inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS);
     2.7 +  inline static oop array_allocate_nozero(KlassHandle klass, int size, int length, TRAPS);
     2.8  
     2.9    // Special obj/array allocation facilities.
    2.10    // Some heaps may want to manage "permanent" data uniquely. These default
     3.1 --- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Sun Sep 25 16:03:29 2011 -0700
     3.2 +++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Mon Sep 26 10:24:05 2011 -0700
     3.3 @@ -274,6 +274,23 @@
     3.4    return (oop)obj;
     3.5  }
     3.6  
     3.7 +oop CollectedHeap::array_allocate_nozero(KlassHandle klass,
     3.8 +                                         int size,
     3.9 +                                         int length,
    3.10 +                                         TRAPS) {
    3.11 +  debug_only(check_for_valid_allocation_state());
    3.12 +  assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
    3.13 +  assert(size >= 0, "int won't convert to size_t");
    3.14 +  HeapWord* obj = common_mem_allocate_noinit(size, CHECK_NULL);
    3.15 +  ((oop)obj)->set_klass_gap(0);
    3.16 +  post_allocation_setup_array(klass, obj, size, length);
    3.17 +#ifndef PRODUCT
    3.18 +  const size_t hs = oopDesc::header_size()+1;
    3.19 +  Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs);
    3.20 +#endif
    3.21 +  return (oop)obj;
    3.22 +}
    3.23 +
    3.24  oop CollectedHeap::permanent_obj_allocate(KlassHandle klass, int size, TRAPS) {
    3.25    oop obj = permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
    3.26    post_allocation_install_obj_klass(klass, obj, size);
     4.1 --- a/src/share/vm/memory/oopFactory.cpp	Sun Sep 25 16:03:29 2011 -0700
     4.2 +++ b/src/share/vm/memory/oopFactory.cpp	Mon Sep 26 10:24:05 2011 -0700
     4.3 @@ -77,7 +77,14 @@
     4.4  typeArrayOop oopFactory::new_typeArray(BasicType type, int length, TRAPS) {
     4.5    klassOop type_asKlassOop = Universe::typeArrayKlassObj(type);
     4.6    typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop);
     4.7 -  typeArrayOop result = type_asArrayKlass->allocate(length, THREAD);
     4.8 +  typeArrayOop result = type_asArrayKlass->allocate_common(length, true, THREAD);
     4.9 +  return result;
    4.10 +}
    4.11 +
    4.12 +typeArrayOop oopFactory::new_typeArray_nozero(BasicType type, int length, TRAPS) {
    4.13 +  klassOop type_asKlassOop = Universe::typeArrayKlassObj(type);
    4.14 +  typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop);
    4.15 +  typeArrayOop result = type_asArrayKlass->allocate_common(length, false, THREAD);
    4.16    return result;
    4.17  }
    4.18  
     5.1 --- a/src/share/vm/memory/oopFactory.hpp	Sun Sep 25 16:03:29 2011 -0700
     5.2 +++ b/src/share/vm/memory/oopFactory.hpp	Mon Sep 26 10:24:05 2011 -0700
     5.3 @@ -63,6 +63,7 @@
     5.4    static typeArrayOop    new_permanent_intArray  (int length, TRAPS);  // used for class file structures
     5.5  
     5.6    static typeArrayOop    new_typeArray(BasicType type, int length, TRAPS);
     5.7 +  static typeArrayOop    new_typeArray_nozero(BasicType type, int length, TRAPS);
     5.8  
     5.9    // Constant pools
    5.10    static constantPoolOop      new_constantPool     (int length,
     6.1 --- a/src/share/vm/oops/typeArrayKlass.cpp	Sun Sep 25 16:03:29 2011 -0700
     6.2 +++ b/src/share/vm/oops/typeArrayKlass.cpp	Mon Sep 26 10:24:05 2011 -0700
     6.3 @@ -76,7 +76,7 @@
     6.4    return k();
     6.5  }
     6.6  
     6.7 -typeArrayOop typeArrayKlass::allocate(int length, TRAPS) {
     6.8 +typeArrayOop typeArrayKlass::allocate_common(int length, bool do_zero, TRAPS) {
     6.9    assert(log2_element_size() >= 0, "bad scale");
    6.10    if (length >= 0) {
    6.11      if (length <= max_length()) {
    6.12 @@ -84,7 +84,11 @@
    6.13        KlassHandle h_k(THREAD, as_klassOop());
    6.14        typeArrayOop t;
    6.15        CollectedHeap* ch = Universe::heap();
    6.16 -      t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL);
    6.17 +      if (do_zero) {
    6.18 +        t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL);
    6.19 +      } else {
    6.20 +        t = (typeArrayOop)CollectedHeap::array_allocate_nozero(h_k, (int)size, length, CHECK_NULL);
    6.21 +      }
    6.22        assert(t->is_parsable(), "Don't publish unless parsable");
    6.23        return t;
    6.24      } else {
     7.1 --- a/src/share/vm/oops/typeArrayKlass.hpp	Sun Sep 25 16:03:29 2011 -0700
     7.2 +++ b/src/share/vm/oops/typeArrayKlass.hpp	Mon Sep 26 10:24:05 2011 -0700
     7.3 @@ -56,7 +56,8 @@
     7.4    bool compute_is_subtype_of(klassOop k);
     7.5  
     7.6    // Allocation
     7.7 -  typeArrayOop allocate(int length, TRAPS);
     7.8 +  typeArrayOop allocate_common(int length, bool do_zero, TRAPS);
     7.9 +  typeArrayOop allocate(int length, TRAPS) { return allocate_common(length, true, THREAD); }
    7.10    typeArrayOop allocate_permanent(int length, TRAPS);  // used for class file structures
    7.11    oop multi_allocate(int rank, jint* sizes, TRAPS);
    7.12  
     8.1 --- a/src/share/vm/opto/library_call.cpp	Sun Sep 25 16:03:29 2011 -0700
     8.2 +++ b/src/share/vm/opto/library_call.cpp	Mon Sep 26 10:24:05 2011 -0700
     8.3 @@ -4658,6 +4658,7 @@
     8.4      // "You break it, you buy it."
     8.5      InitializeNode* init = alloc->initialization();
     8.6      assert(init->is_complete(), "we just did this");
     8.7 +    init->set_complete_with_arraycopy();
     8.8      assert(dest->is_CheckCastPP(), "sanity");
     8.9      assert(dest->in(0)->in(0) == init, "dest pinned");
    8.10      adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
     9.1 --- a/src/share/vm/opto/macro.cpp	Sun Sep 25 16:03:29 2011 -0700
     9.2 +++ b/src/share/vm/opto/macro.cpp	Mon Sep 26 10:24:05 2011 -0700
     9.3 @@ -1685,9 +1685,21 @@
     9.4  
     9.5  void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) {
     9.6    Node* length = alloc->in(AllocateNode::ALength);
     9.7 +  InitializeNode* init = alloc->initialization();
     9.8 +  Node* klass_node = alloc->in(AllocateNode::KlassNode);
     9.9 +  ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
    9.10 +  address slow_call_address;  // Address of slow call
    9.11 +  if (init != NULL && init->is_complete_with_arraycopy() &&
    9.12 +      k->is_type_array_klass()) {
    9.13 +    // Don't zero type array during slow allocation in VM since
    9.14 +    // it will be initialized later by arraycopy in compiled code.
    9.15 +    slow_call_address = OptoRuntime::new_array_nozero_Java();
    9.16 +  } else {
    9.17 +    slow_call_address = OptoRuntime::new_array_Java();
    9.18 +  }
    9.19    expand_allocate_common(alloc, length,
    9.20                           OptoRuntime::new_array_Type(),
    9.21 -                         OptoRuntime::new_array_Java());
    9.22 +                         slow_call_address);
    9.23  }
    9.24  
    9.25  //-----------------------mark_eliminated_locking_nodes-----------------------
    10.1 --- a/src/share/vm/opto/memnode.cpp	Sun Sep 25 16:03:29 2011 -0700
    10.2 +++ b/src/share/vm/opto/memnode.cpp	Mon Sep 26 10:24:05 2011 -0700
    10.3 @@ -2847,7 +2847,7 @@
    10.4  
    10.5  //---------------------------InitializeNode------------------------------------
    10.6  InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
    10.7 -  : _is_complete(false),
    10.8 +  : _is_complete(Incomplete),
    10.9      MemBarNode(C, adr_type, rawoop)
   10.10  {
   10.11    init_class_id(Class_Initialize);
   10.12 @@ -2885,7 +2885,7 @@
   10.13  
   10.14  void InitializeNode::set_complete(PhaseGVN* phase) {
   10.15    assert(!is_complete(), "caller responsibility");
   10.16 -  _is_complete = true;
   10.17 +  _is_complete = Complete;
   10.18  
   10.19    // After this node is complete, it contains a bunch of
   10.20    // raw-memory initializations.  There is no need for
    11.1 --- a/src/share/vm/opto/memnode.hpp	Sun Sep 25 16:03:29 2011 -0700
    11.2 +++ b/src/share/vm/opto/memnode.hpp	Mon Sep 26 10:24:05 2011 -0700
    11.3 @@ -942,7 +942,12 @@
    11.4  class InitializeNode: public MemBarNode {
    11.5    friend class AllocateNode;
    11.6  
    11.7 -  bool _is_complete;
    11.8 +  enum {
    11.9 +    Incomplete    = 0,
   11.10 +    Complete      = 1,
   11.11 +    WithArraycopy = 2
   11.12 +  };
   11.13 +  int _is_complete;
   11.14  
   11.15  public:
   11.16    enum {
   11.17 @@ -976,10 +981,12 @@
   11.18    // An InitializeNode must completed before macro expansion is done.
   11.19    // Completion requires that the AllocateNode must be followed by
   11.20    // initialization of the new memory to zero, then to any initializers.
   11.21 -  bool is_complete() { return _is_complete; }
   11.22 +  bool is_complete() { return _is_complete != Incomplete; }
   11.23 +  bool is_complete_with_arraycopy() { return (_is_complete & WithArraycopy) != 0; }
   11.24  
   11.25    // Mark complete.  (Must not yet be complete.)
   11.26    void set_complete(PhaseGVN* phase);
   11.27 +  void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
   11.28  
   11.29  #ifdef ASSERT
   11.30    // ensure all non-degenerate stores are ordered and non-overlapping
    12.1 --- a/src/share/vm/opto/runtime.cpp	Sun Sep 25 16:03:29 2011 -0700
    12.2 +++ b/src/share/vm/opto/runtime.cpp	Mon Sep 26 10:24:05 2011 -0700
    12.3 @@ -102,6 +102,7 @@
    12.4  // Compiled code entry points
    12.5  address OptoRuntime::_new_instance_Java                           = NULL;
    12.6  address OptoRuntime::_new_array_Java                              = NULL;
    12.7 +address OptoRuntime::_new_array_nozero_Java                       = NULL;
    12.8  address OptoRuntime::_multianewarray2_Java                        = NULL;
    12.9  address OptoRuntime::_multianewarray3_Java                        = NULL;
   12.10  address OptoRuntime::_multianewarray4_Java                        = NULL;
   12.11 @@ -151,6 +152,7 @@
   12.12    // -------------------------------------------------------------------------------------------------------------------------------
   12.13    gen(env, _new_instance_Java              , new_instance_Type            , new_instance_C                  ,    0 , true , false, false);
   12.14    gen(env, _new_array_Java                 , new_array_Type               , new_array_C                     ,    0 , true , false, false);
   12.15 +  gen(env, _new_array_nozero_Java          , new_array_Type               , new_array_nozero_C              ,    0 , true , false, false);
   12.16    gen(env, _multianewarray2_Java           , multianewarray2_Type         , multianewarray2_C               ,    0 , true , false, false);
   12.17    gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
   12.18    gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
   12.19 @@ -308,6 +310,36 @@
   12.20    }
   12.21  JRT_END
   12.22  
   12.23 +// array allocation without zeroing
   12.24 +JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(klassOopDesc* array_type, int len, JavaThread *thread))
   12.25 +  JRT_BLOCK;
   12.26 +#ifndef PRODUCT
   12.27 +  SharedRuntime::_new_array_ctr++;            // new array requires GC
   12.28 +#endif
   12.29 +  assert(check_compiled_frame(thread), "incorrect caller");
   12.30 +
   12.31 +  // Scavenge and allocate an instance.
   12.32 +  oop result;
   12.33 +
   12.34 +  assert(Klass::cast(array_type)->oop_is_typeArray(), "should be called only for type array");
   12.35 +  // The oopFactory likes to work with the element type.
   12.36 +  BasicType elem_type = typeArrayKlass::cast(array_type)->element_type();
   12.37 +  result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD);
   12.38 +
   12.39 +  // Pass oops back through thread local storage.  Our apparent type to Java
   12.40 +  // is that we return an oop, but we can block on exit from this routine and
   12.41 +  // a GC can trash the oop in C's return register.  The generated stub will
   12.42 +  // fetch the oop from TLS after any possible GC.
   12.43 +  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   12.44 +  thread->set_vm_result(result);
   12.45 +  JRT_BLOCK_END;
   12.46 +
   12.47 +  if (GraphKit::use_ReduceInitialCardMarks()) {
   12.48 +    // inform GC that we won't do card marks for initializing writes.
   12.49 +    new_store_pre_barrier(thread);
   12.50 +  }
   12.51 +JRT_END
   12.52 +
   12.53  // Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
   12.54  
   12.55  // multianewarray for 2 dimensions
    13.1 --- a/src/share/vm/opto/runtime.hpp	Sun Sep 25 16:03:29 2011 -0700
    13.2 +++ b/src/share/vm/opto/runtime.hpp	Mon Sep 26 10:24:05 2011 -0700
    13.3 @@ -114,6 +114,7 @@
    13.4    // References to generated stubs
    13.5    static address _new_instance_Java;
    13.6    static address _new_array_Java;
    13.7 +  static address _new_array_nozero_Java;
    13.8    static address _multianewarray2_Java;
    13.9    static address _multianewarray3_Java;
   13.10    static address _multianewarray4_Java;
   13.11 @@ -143,6 +144,7 @@
   13.12  
   13.13    // Allocate storage for a objArray or typeArray
   13.14    static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
   13.15 +  static void new_array_nozero_C(klassOopDesc* array_klass, int len, JavaThread *thread);
   13.16  
   13.17    // Post-slow-path-allocation, pre-initializing-stores step for
   13.18    // implementing ReduceInitialCardMarks
   13.19 @@ -208,6 +210,7 @@
   13.20    // access to runtime stubs entry points for java code
   13.21    static address new_instance_Java()                     { return _new_instance_Java; }
   13.22    static address new_array_Java()                        { return _new_array_Java; }
   13.23 +  static address new_array_nozero_Java()                 { return _new_array_nozero_Java; }
   13.24    static address multianewarray2_Java()                  { return _multianewarray2_Java; }
   13.25    static address multianewarray3_Java()                  { return _multianewarray3_Java; }
   13.26    static address multianewarray4_Java()                  { return _multianewarray4_Java; }

mercurial