Merge

Thu, 03 Mar 2011 23:31:45 -0800

author
kvn
date
Thu, 03 Mar 2011 23:31:45 -0800
changeset 2607
8c9c9ee30d71
parent 2594
11303bede852
parent 2606
0ac769a57c64
child 2608
3e2b59ab2d07
child 2630
5d8f5a6dced7
child 2631
4cd9add59b1e

Merge

src/share/vm/runtime/arguments.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
     1.2 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
     1.3 @@ -395,9 +395,9 @@
     1.4  
     1.5    int offset = code_offset();
     1.6  
     1.7 -  __ call(Runtime1::entry_for(Runtime1::handle_exception_id), relocInfo::runtime_call_type);
     1.8 +  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
     1.9    __ delayed()->nop();
    1.10 -  debug_only(__ stop("should have gone to the caller");)
    1.11 +  __ should_not_reach_here();
    1.12    assert(code_offset() - offset <= exception_handler_size, "overflow");
    1.13    __ end_a_stub();
    1.14  
     2.1 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
     2.2 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
     2.3 @@ -148,7 +148,7 @@
     2.4  
     2.5  static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
     2.6    assert(frame_size_in_bytes == __ total_frame_size_in_bytes(reg_save_size_in_words),
     2.7 -         " mismatch in calculation");
     2.8 +         "mismatch in calculation");
     2.9    sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
    2.10    int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
    2.11    OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
    2.12 @@ -176,9 +176,8 @@
    2.13  
    2.14  static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true) {
    2.15    assert(frame_size_in_bytes == __ total_frame_size_in_bytes(reg_save_size_in_words),
    2.16 -         " mismatch in calculation");
    2.17 +         "mismatch in calculation");
    2.18    __ save_frame_c1(frame_size_in_bytes);
    2.19 -  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
    2.20  
    2.21    // Record volatile registers as callee-save values in an OopMap so their save locations will be
    2.22    // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
    2.23 @@ -367,23 +366,7 @@
    2.24    switch (id) {
    2.25      case forward_exception_id:
    2.26        {
    2.27 -        // we're handling an exception in the context of a compiled
    2.28 -        // frame.  The registers have been saved in the standard
    2.29 -        // places.  Perform an exception lookup in the caller and
    2.30 -        // dispatch to the handler if found.  Otherwise unwind and
    2.31 -        // dispatch to the callers exception handler.
    2.32 -
    2.33 -        oop_maps = new OopMapSet();
    2.34 -        OopMap* oop_map = generate_oop_map(sasm, true);
    2.35 -
    2.36 -        // transfer the pending exception to the exception_oop
    2.37 -        __ ld_ptr(G2_thread, in_bytes(JavaThread::pending_exception_offset()), Oexception);
    2.38 -        __ ld_ptr(Oexception, 0, G0);
    2.39 -        __ st_ptr(G0, G2_thread, in_bytes(JavaThread::pending_exception_offset()));
    2.40 -        __ add(I7, frame::pc_return_offset, Oissuing_pc);
    2.41 -
    2.42 -        generate_handle_exception(sasm, oop_maps, oop_map);
    2.43 -        __ should_not_reach_here();
    2.44 +        oop_maps = generate_handle_exception(id, sasm);
    2.45        }
    2.46        break;
    2.47  
    2.48 @@ -671,15 +654,14 @@
    2.49        break;
    2.50  
    2.51      case handle_exception_id:
    2.52 -      {
    2.53 -        __ set_info("handle_exception", dont_gc_arguments);
    2.54 -        // make a frame and preserve the caller's caller-save registers
    2.55 +      { __ set_info("handle_exception", dont_gc_arguments);
    2.56 +        oop_maps = generate_handle_exception(id, sasm);
    2.57 +      }
    2.58 +      break;
    2.59  
    2.60 -        oop_maps = new OopMapSet();
    2.61 -        OopMap* oop_map = save_live_registers(sasm);
    2.62 -        __ mov(Oexception->after_save(),  Oexception);
    2.63 -        __ mov(Oissuing_pc->after_save(), Oissuing_pc);
    2.64 -        generate_handle_exception(sasm, oop_maps, oop_map);
    2.65 +    case handle_exception_from_callee_id:
    2.66 +      { __ set_info("handle_exception_from_callee", dont_gc_arguments);
    2.67 +        oop_maps = generate_handle_exception(id, sasm);
    2.68        }
    2.69        break;
    2.70  
    2.71 @@ -696,7 +678,7 @@
    2.72                          G2_thread, Oissuing_pc->after_save());
    2.73          __ verify_not_null_oop(Oexception->after_save());
    2.74  
    2.75 -        // Restore SP from L7 if the exception PC is a MethodHandle call site.
    2.76 +        // Restore SP from L7 if the exception PC is a method handle call site.
    2.77          __ mov(O0, G5);  // Save the target address.
    2.78          __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0);
    2.79          __ tst(L0);  // Condition codes are preserved over the restore.
    2.80 @@ -1006,48 +988,89 @@
    2.81  }
    2.82  
    2.83  
    2.84 -void Runtime1::generate_handle_exception(StubAssembler* sasm, OopMapSet* oop_maps, OopMap* oop_map, bool) {
    2.85 -  Label no_deopt;
    2.86 +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) {
    2.87 +  __ block_comment("generate_handle_exception");
    2.88 +
    2.89 +  // Save registers, if required.
    2.90 +  OopMapSet* oop_maps = new OopMapSet();
    2.91 +  OopMap* oop_map = NULL;
    2.92 +  switch (id) {
    2.93 +  case forward_exception_id:
    2.94 +    // We're handling an exception in the context of a compiled frame.
    2.95 +    // The registers have been saved in the standard places.  Perform
    2.96 +    // an exception lookup in the caller and dispatch to the handler
    2.97 +    // if found.  Otherwise unwind and dispatch to the callers
    2.98 +    // exception handler.
    2.99 +     oop_map = generate_oop_map(sasm, true);
   2.100 +
   2.101 +     // transfer the pending exception to the exception_oop
   2.102 +     __ ld_ptr(G2_thread, in_bytes(JavaThread::pending_exception_offset()), Oexception);
   2.103 +     __ ld_ptr(Oexception, 0, G0);
   2.104 +     __ st_ptr(G0, G2_thread, in_bytes(JavaThread::pending_exception_offset()));
   2.105 +     __ add(I7, frame::pc_return_offset, Oissuing_pc);
   2.106 +    break;
   2.107 +  case handle_exception_id:
   2.108 +    // At this point all registers MAY be live.
   2.109 +    oop_map = save_live_registers(sasm);
   2.110 +    __ mov(Oexception->after_save(),  Oexception);
   2.111 +    __ mov(Oissuing_pc->after_save(), Oissuing_pc);
   2.112 +    break;
   2.113 +  case handle_exception_from_callee_id:
   2.114 +    // At this point all registers except exception oop (Oexception)
   2.115 +    // and exception pc (Oissuing_pc) are dead.
   2.116 +    oop_map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
   2.117 +    sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
   2.118 +    __ save_frame_c1(frame_size_in_bytes);
   2.119 +    __ mov(Oexception->after_save(),  Oexception);
   2.120 +    __ mov(Oissuing_pc->after_save(), Oissuing_pc);
   2.121 +    break;
   2.122 +  default:  ShouldNotReachHere();
   2.123 +  }
   2.124  
   2.125    __ verify_not_null_oop(Oexception);
   2.126  
   2.127    // save the exception and issuing pc in the thread
   2.128 -  __ st_ptr(Oexception, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
   2.129 +  __ st_ptr(Oexception,  G2_thread, in_bytes(JavaThread::exception_oop_offset()));
   2.130    __ st_ptr(Oissuing_pc, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
   2.131  
   2.132 -  // save the real return address and use the throwing pc as the return address to lookup (has bci & oop map)
   2.133 -  __ mov(I7, L0);
   2.134 +  // use the throwing pc as the return address to lookup (has bci & oop map)
   2.135    __ mov(Oissuing_pc, I7);
   2.136    __ sub(I7, frame::pc_return_offset, I7);
   2.137    int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
   2.138 +  oop_maps->add_gc_map(call_offset, oop_map);
   2.139  
   2.140    // Note: if nmethod has been deoptimized then regardless of
   2.141    // whether it had a handler or not we will deoptimize
   2.142    // by entering the deopt blob with a pending exception.
   2.143  
   2.144 -#ifdef ASSERT
   2.145 -  Label done;
   2.146 -  __ tst(O0);
   2.147 -  __ br(Assembler::notZero, false, Assembler::pn, done);
   2.148 -  __ delayed()->nop();
   2.149 -  __ stop("should have found address");
   2.150 -  __ bind(done);
   2.151 -#endif
   2.152 +  // Restore the registers that were saved at the beginning, remove
   2.153 +  // the frame and jump to the exception handler.
   2.154 +  switch (id) {
   2.155 +  case forward_exception_id:
   2.156 +  case handle_exception_id:
   2.157 +    restore_live_registers(sasm);
   2.158 +    __ jmp(O0, 0);
   2.159 +    __ delayed()->restore();
   2.160 +    break;
   2.161 +  case handle_exception_from_callee_id:
   2.162 +    // Restore SP from L7 if the exception PC is a method handle call site.
   2.163 +    __ mov(O0, G5);  // Save the target address.
   2.164 +    __ lduw(Address(G2_thread, JavaThread::is_method_handle_return_offset()), L0);
   2.165 +    __ tst(L0);  // Condition codes are preserved over the restore.
   2.166 +    __ restore();
   2.167  
   2.168 -  // restore the registers that were saved at the beginning and jump to the exception handler.
   2.169 -  restore_live_registers(sasm);
   2.170 +    __ jmp(G5, 0);  // jump to the exception handler
   2.171 +    __ delayed()->movcc(Assembler::notZero, false, Assembler::icc, L7_mh_SP_save, SP);  // Restore SP if required.
   2.172 +    break;
   2.173 +  default:  ShouldNotReachHere();
   2.174 +  }
   2.175  
   2.176 -  __ jmp(O0, 0);
   2.177 -  __ delayed()->restore();
   2.178 -
   2.179 -  oop_maps->add_gc_map(call_offset, oop_map);
   2.180 +  return oop_maps;
   2.181  }
   2.182  
   2.183  
   2.184  #undef __
   2.185  
   2.186 -#define __ masm->
   2.187 -
   2.188  const char *Runtime1::pd_name_for_address(address entry) {
   2.189    return "<unknown function>";
   2.190  }
     3.1 --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
     3.2 +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
     3.3 @@ -417,6 +417,7 @@
     3.4  
     3.5    // Some handy addresses:
     3.6    Address G5_method_fie(    G5_method,        in_bytes(methodOopDesc::from_interpreted_offset()));
     3.7 +  Address G5_method_fce(    G5_method,        in_bytes(methodOopDesc::from_compiled_offset()));
     3.8  
     3.9    Address G3_mh_vmtarget(   G3_method_handle, java_dyn_MethodHandle::vmtarget_offset_in_bytes());
    3.10  
    3.11 @@ -444,12 +445,10 @@
    3.12    case _raise_exception:
    3.13      {
    3.14        // Not a real MH entry, but rather shared code for raising an
    3.15 -      // exception.  Since we use a C2I adapter to set up the
    3.16 -      // interpreter state, arguments are expected in compiler
    3.17 -      // argument registers.
    3.18 +      // exception.  Since we use the compiled entry, arguments are
    3.19 +      // expected in compiler argument registers.
    3.20        assert(raise_exception_method(), "must be set");
    3.21 -      address c2i_entry = raise_exception_method()->get_c2i_entry();
    3.22 -      assert(c2i_entry, "method must be linked");
    3.23 +      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
    3.24  
    3.25        __ mov(O5_savedSP, SP);  // Cut the stack back to where the caller started.
    3.26  
    3.27 @@ -468,10 +467,9 @@
    3.28        __ delayed()->nop();
    3.29  
    3.30        __ verify_oop(G5_method);
    3.31 -      __ jump_to(AddressLiteral(c2i_entry), O3_scratch);
    3.32 +      __ jump_indirect_to(G5_method_fce, O3_scratch);  // jump to compiled entry
    3.33        __ delayed()->nop();
    3.34  
    3.35 -      // If we get here, the Java runtime did not do its job of creating the exception.
    3.36        // Do something that is at least causes a valid throw from the interpreter.
    3.37        __ bind(L_no_method);
    3.38        __ unimplemented("call throw_WrongMethodType_entry");
     4.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 03 21:02:56 2011 -0800
     4.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 03 23:31:45 2011 -0800
     4.3 @@ -1,5 +1,5 @@
     4.4  /*
     4.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
     4.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
     4.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4.8   *
     4.9   * This code is free software; you can redistribute it and/or modify it
    4.10 @@ -968,19 +968,6 @@
    4.11      return start;
    4.12    }
    4.13  
    4.14 -  static address disjoint_byte_copy_entry;
    4.15 -  static address disjoint_short_copy_entry;
    4.16 -  static address disjoint_int_copy_entry;
    4.17 -  static address disjoint_long_copy_entry;
    4.18 -  static address disjoint_oop_copy_entry;
    4.19 -
    4.20 -  static address byte_copy_entry;
    4.21 -  static address short_copy_entry;
    4.22 -  static address int_copy_entry;
    4.23 -  static address long_copy_entry;
    4.24 -  static address oop_copy_entry;
    4.25 -
    4.26 -  static address checkcast_copy_entry;
    4.27  
    4.28    //
    4.29    // Verify that a register contains clean 32-bits positive value
    4.30 @@ -1046,31 +1033,40 @@
    4.31    //
    4.32    //  The input registers are overwritten.
    4.33    //
    4.34 -  void gen_write_ref_array_pre_barrier(Register addr, Register count) {
    4.35 +  void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
    4.36      BarrierSet* bs = Universe::heap()->barrier_set();
    4.37 -    if (bs->has_write_ref_pre_barrier()) {
    4.38 -      assert(bs->has_write_ref_array_pre_opt(),
    4.39 -             "Else unsupported barrier set.");
    4.40 -
    4.41 -      __ save_frame(0);
    4.42 -      // Save the necessary global regs... will be used after.
    4.43 -      if (addr->is_global()) {
    4.44 -        __ mov(addr, L0);
    4.45 -      }
    4.46 -      if (count->is_global()) {
    4.47 -        __ mov(count, L1);
    4.48 -      }
    4.49 -      __ mov(addr->after_save(), O0);
    4.50 -      // Get the count into O1
    4.51 -      __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
    4.52 -      __ delayed()->mov(count->after_save(), O1);
    4.53 -      if (addr->is_global()) {
    4.54 -        __ mov(L0, addr);
    4.55 -      }
    4.56 -      if (count->is_global()) {
    4.57 -        __ mov(L1, count);
    4.58 -      }
    4.59 -      __ restore();
    4.60 +    switch (bs->kind()) {
    4.61 +      case BarrierSet::G1SATBCT:
    4.62 +      case BarrierSet::G1SATBCTLogging:
    4.63 +        // With G1, don't generate the call if we statically know that the target in uninitialized
    4.64 +        if (!dest_uninitialized) {
    4.65 +          __ save_frame(0);
    4.66 +          // Save the necessary global regs... will be used after.
    4.67 +          if (addr->is_global()) {
    4.68 +            __ mov(addr, L0);
    4.69 +          }
    4.70 +          if (count->is_global()) {
    4.71 +            __ mov(count, L1);
    4.72 +          }
    4.73 +          __ mov(addr->after_save(), O0);
    4.74 +          // Get the count into O1
    4.75 +          __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
    4.76 +          __ delayed()->mov(count->after_save(), O1);
    4.77 +          if (addr->is_global()) {
    4.78 +            __ mov(L0, addr);
    4.79 +          }
    4.80 +          if (count->is_global()) {
    4.81 +            __ mov(L1, count);
    4.82 +          }
    4.83 +          __ restore();
    4.84 +        }
    4.85 +        break;
    4.86 +      case BarrierSet::CardTableModRef:
    4.87 +      case BarrierSet::CardTableExtension:
    4.88 +      case BarrierSet::ModRef:
    4.89 +        break;
    4.90 +      default:
    4.91 +        ShouldNotReachHere();
    4.92      }
    4.93    }
    4.94    //
    4.95 @@ -1084,7 +1080,7 @@
    4.96    //  The input registers are overwritten.
    4.97    //
    4.98    void gen_write_ref_array_post_barrier(Register addr, Register count,
    4.99 -                                   Register tmp) {
   4.100 +                                        Register tmp) {
   4.101      BarrierSet* bs = Universe::heap()->barrier_set();
   4.102  
   4.103      switch (bs->kind()) {
   4.104 @@ -1283,7 +1279,7 @@
   4.105    //      to:    O1
   4.106    //      count: O2 treated as signed
   4.107    //
   4.108 -  address generate_disjoint_byte_copy(bool aligned, const char * name) {
   4.109 +  address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) {
   4.110      __ align(CodeEntryAlignment);
   4.111      StubCodeMark mark(this, "StubRoutines", name);
   4.112      address start = __ pc();
   4.113 @@ -1299,9 +1295,11 @@
   4.114  
   4.115      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.116  
   4.117 -    if (!aligned)  disjoint_byte_copy_entry = __ pc();
   4.118 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.119 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.120 +    if (entry != NULL) {
   4.121 +      *entry = __ pc();
   4.122 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.123 +      BLOCK_COMMENT("Entry:");
   4.124 +    }
   4.125  
   4.126      // for short arrays, just do single element copy
   4.127      __ cmp(count, 23); // 16 + 7
   4.128 @@ -1391,15 +1389,13 @@
   4.129    //      to:    O1
   4.130    //      count: O2 treated as signed
   4.131    //
   4.132 -  address generate_conjoint_byte_copy(bool aligned, const char * name) {
   4.133 +  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
   4.134 +                                      address *entry, const char *name) {
   4.135      // Do reverse copy.
   4.136  
   4.137      __ align(CodeEntryAlignment);
   4.138      StubCodeMark mark(this, "StubRoutines", name);
   4.139      address start = __ pc();
   4.140 -    address nooverlap_target = aligned ?
   4.141 -        StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
   4.142 -        disjoint_byte_copy_entry;
   4.143  
   4.144      Label L_skip_alignment, L_align, L_aligned_copy;
   4.145      Label L_copy_byte, L_copy_byte_loop, L_exit;
   4.146 @@ -1412,9 +1408,11 @@
   4.147  
   4.148      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.149  
   4.150 -    if (!aligned)  byte_copy_entry = __ pc();
   4.151 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.152 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.153 +    if (entry != NULL) {
   4.154 +      *entry = __ pc();
   4.155 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.156 +      BLOCK_COMMENT("Entry:");
   4.157 +    }
   4.158  
   4.159      array_overlap_test(nooverlap_target, 0);
   4.160  
   4.161 @@ -1504,7 +1502,7 @@
   4.162    //      to:    O1
   4.163    //      count: O2 treated as signed
   4.164    //
   4.165 -  address generate_disjoint_short_copy(bool aligned, const char * name) {
   4.166 +  address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {
   4.167      __ align(CodeEntryAlignment);
   4.168      StubCodeMark mark(this, "StubRoutines", name);
   4.169      address start = __ pc();
   4.170 @@ -1520,9 +1518,11 @@
   4.171  
   4.172      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.173  
   4.174 -    if (!aligned)  disjoint_short_copy_entry = __ pc();
   4.175 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.176 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.177 +    if (entry != NULL) {
   4.178 +      *entry = __ pc();
   4.179 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.180 +      BLOCK_COMMENT("Entry:");
   4.181 +    }
   4.182  
   4.183      // for short arrays, just do single element copy
   4.184      __ cmp(count, 11); // 8 + 3  (22 bytes)
   4.185 @@ -1842,15 +1842,13 @@
   4.186    //      to:    O1
   4.187    //      count: O2 treated as signed
   4.188    //
   4.189 -  address generate_conjoint_short_copy(bool aligned, const char * name) {
   4.190 +  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
   4.191 +                                       address *entry, const char *name) {
   4.192      // Do reverse copy.
   4.193  
   4.194      __ align(CodeEntryAlignment);
   4.195      StubCodeMark mark(this, "StubRoutines", name);
   4.196      address start = __ pc();
   4.197 -    address nooverlap_target = aligned ?
   4.198 -        StubRoutines::arrayof_jshort_disjoint_arraycopy() :
   4.199 -        disjoint_short_copy_entry;
   4.200  
   4.201      Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
   4.202      Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
   4.203 @@ -1865,9 +1863,11 @@
   4.204  
   4.205      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.206  
   4.207 -    if (!aligned)  short_copy_entry = __ pc();
   4.208 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.209 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.210 +    if (entry != NULL) {
   4.211 +      *entry = __ pc();
   4.212 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.213 +      BLOCK_COMMENT("Entry:");
   4.214 +    }
   4.215  
   4.216      array_overlap_test(nooverlap_target, 1);
   4.217  
   4.218 @@ -2072,7 +2072,7 @@
   4.219    //      to:    O1
   4.220    //      count: O2 treated as signed
   4.221    //
   4.222 -  address generate_disjoint_int_copy(bool aligned, const char * name) {
   4.223 +  address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
   4.224      __ align(CodeEntryAlignment);
   4.225      StubCodeMark mark(this, "StubRoutines", name);
   4.226      address start = __ pc();
   4.227 @@ -2080,9 +2080,11 @@
   4.228      const Register count = O2;
   4.229      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.230  
   4.231 -    if (!aligned)  disjoint_int_copy_entry = __ pc();
   4.232 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.233 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.234 +    if (entry != NULL) {
   4.235 +      *entry = __ pc();
   4.236 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.237 +      BLOCK_COMMENT("Entry:");
   4.238 +    }
   4.239  
   4.240      generate_disjoint_int_copy_core(aligned);
   4.241  
   4.242 @@ -2204,20 +2206,19 @@
   4.243    //      to:    O1
   4.244    //      count: O2 treated as signed
   4.245    //
   4.246 -  address generate_conjoint_int_copy(bool aligned, const char * name) {
   4.247 +  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
   4.248 +                                     address *entry, const char *name) {
   4.249      __ align(CodeEntryAlignment);
   4.250      StubCodeMark mark(this, "StubRoutines", name);
   4.251      address start = __ pc();
   4.252  
   4.253 -    address nooverlap_target = aligned ?
   4.254 -        StubRoutines::arrayof_jint_disjoint_arraycopy() :
   4.255 -        disjoint_int_copy_entry;
   4.256 -
   4.257      assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
   4.258  
   4.259 -    if (!aligned)  int_copy_entry = __ pc();
   4.260 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.261 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.262 +    if (entry != NULL) {
   4.263 +      *entry = __ pc();
   4.264 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.265 +      BLOCK_COMMENT("Entry:");
   4.266 +    }
   4.267  
   4.268      array_overlap_test(nooverlap_target, 2);
   4.269  
   4.270 @@ -2336,16 +2337,18 @@
   4.271    //      to:    O1
   4.272    //      count: O2 treated as signed
   4.273    //
   4.274 -  address generate_disjoint_long_copy(bool aligned, const char * name) {
   4.275 +  address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) {
   4.276      __ align(CodeEntryAlignment);
   4.277      StubCodeMark mark(this, "StubRoutines", name);
   4.278      address start = __ pc();
   4.279  
   4.280      assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
   4.281  
   4.282 -    if (!aligned)  disjoint_long_copy_entry = __ pc();
   4.283 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.284 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.285 +    if (entry != NULL) {
   4.286 +      *entry = __ pc();
   4.287 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.288 +      BLOCK_COMMENT("Entry:");
   4.289 +    }
   4.290  
   4.291      generate_disjoint_long_copy_core(aligned);
   4.292  
   4.293 @@ -2406,19 +2409,21 @@
   4.294    //      to:    O1
   4.295    //      count: O2 treated as signed
   4.296    //
   4.297 -  address generate_conjoint_long_copy(bool aligned, const char * name) {
   4.298 +  address generate_conjoint_long_copy(bool aligned, address nooverlap_target,
   4.299 +                                      address *entry, const char *name) {
   4.300      __ align(CodeEntryAlignment);
   4.301      StubCodeMark mark(this, "StubRoutines", name);
   4.302      address start = __ pc();
   4.303  
   4.304 -    assert(!aligned, "usage");
   4.305 -    address nooverlap_target = disjoint_long_copy_entry;
   4.306 +    assert(aligned, "Should always be aligned");
   4.307  
   4.308      assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
   4.309  
   4.310 -    if (!aligned)  long_copy_entry = __ pc();
   4.311 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.312 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.313 +    if (entry != NULL) {
   4.314 +      *entry = __ pc();
   4.315 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   4.316 +      BLOCK_COMMENT("Entry:");
   4.317 +    }
   4.318  
   4.319      array_overlap_test(nooverlap_target, 3);
   4.320  
   4.321 @@ -2439,7 +2444,8 @@
   4.322    //      to:    O1
   4.323    //      count: O2 treated as signed
   4.324    //
   4.325 -  address generate_disjoint_oop_copy(bool aligned, const char * name) {
   4.326 +  address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
   4.327 +                                     bool dest_uninitialized = false) {
   4.328  
   4.329      const Register from  = O0;  // source array address
   4.330      const Register to    = O1;  // destination array address
   4.331 @@ -2451,14 +2457,16 @@
   4.332  
   4.333      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.334  
   4.335 -    if (!aligned)  disjoint_oop_copy_entry = __ pc();
   4.336 -    // caller can pass a 64-bit byte count here
   4.337 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.338 +    if (entry != NULL) {
   4.339 +      *entry = __ pc();
   4.340 +      // caller can pass a 64-bit byte count here
   4.341 +      BLOCK_COMMENT("Entry:");
   4.342 +    }
   4.343  
   4.344      // save arguments for barrier generation
   4.345      __ mov(to, G1);
   4.346      __ mov(count, G5);
   4.347 -    gen_write_ref_array_pre_barrier(G1, G5);
   4.348 +    gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
   4.349    #ifdef _LP64
   4.350      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.351      if (UseCompressedOops) {
   4.352 @@ -2487,7 +2495,9 @@
   4.353    //      to:    O1
   4.354    //      count: O2 treated as signed
   4.355    //
   4.356 -  address generate_conjoint_oop_copy(bool aligned, const char * name) {
   4.357 +  address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
   4.358 +                                     address *entry, const char *name,
   4.359 +                                     bool dest_uninitialized = false) {
   4.360  
   4.361      const Register from  = O0;  // source array address
   4.362      const Register to    = O1;  // destination array address
   4.363 @@ -2499,21 +2509,18 @@
   4.364  
   4.365      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
   4.366  
   4.367 -    if (!aligned)  oop_copy_entry = __ pc();
   4.368 -    // caller can pass a 64-bit byte count here
   4.369 -    if (!aligned)  BLOCK_COMMENT("Entry:");
   4.370 +    if (entry != NULL) {
   4.371 +      *entry = __ pc();
   4.372 +      // caller can pass a 64-bit byte count here
   4.373 +      BLOCK_COMMENT("Entry:");
   4.374 +    }
   4.375 +
   4.376 +    array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
   4.377  
   4.378      // save arguments for barrier generation
   4.379      __ mov(to, G1);
   4.380      __ mov(count, G5);
   4.381 -
   4.382 -    gen_write_ref_array_pre_barrier(G1, G5);
   4.383 -
   4.384 -    address nooverlap_target = aligned ?
   4.385 -        StubRoutines::arrayof_oop_disjoint_arraycopy() :
   4.386 -        disjoint_oop_copy_entry;
   4.387 -
   4.388 -    array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
   4.389 +    gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
   4.390  
   4.391    #ifdef _LP64
   4.392      if (UseCompressedOops) {
   4.393 @@ -2582,7 +2589,7 @@
   4.394    //      ckval: O4 (super_klass)
   4.395    //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
   4.396    //
   4.397 -  address generate_checkcast_copy(const char* name) {
   4.398 +  address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) {
   4.399  
   4.400      const Register O0_from   = O0;      // source array address
   4.401      const Register O1_to     = O1;      // destination array address
   4.402 @@ -2600,8 +2607,6 @@
   4.403      StubCodeMark mark(this, "StubRoutines", name);
   4.404      address start = __ pc();
   4.405  
   4.406 -    gen_write_ref_array_pre_barrier(O1, O2);
   4.407 -
   4.408  #ifdef ASSERT
   4.409      // We sometimes save a frame (see generate_type_check below).
   4.410      // If this will cause trouble, let's fail now instead of later.
   4.411 @@ -2625,9 +2630,12 @@
   4.412      }
   4.413  #endif //ASSERT
   4.414  
   4.415 -    checkcast_copy_entry = __ pc();
   4.416 -    // caller can pass a 64-bit byte count here (from generic stub)
   4.417 -    BLOCK_COMMENT("Entry:");
   4.418 +    if (entry != NULL) {
   4.419 +      *entry = __ pc();
   4.420 +      // caller can pass a 64-bit byte count here (from generic stub)
   4.421 +      BLOCK_COMMENT("Entry:");
   4.422 +    }
   4.423 +    gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized);
   4.424  
   4.425      Label load_element, store_element, do_card_marks, fail, done;
   4.426      __ addcc(O2_count, 0, G1_remain);   // initialize loop index, and test it
   4.427 @@ -2700,7 +2708,11 @@
   4.428    // Examines the alignment of the operands and dispatches
   4.429    // to a long, int, short, or byte copy loop.
   4.430    //
   4.431 -  address generate_unsafe_copy(const char* name) {
   4.432 +  address generate_unsafe_copy(const char* name,
   4.433 +                               address byte_copy_entry,
   4.434 +                               address short_copy_entry,
   4.435 +                               address int_copy_entry,
   4.436 +                               address long_copy_entry) {
   4.437  
   4.438      const Register O0_from   = O0;      // source array address
   4.439      const Register O1_to     = O1;      // destination array address
   4.440 @@ -2796,8 +2808,13 @@
   4.441    //    O0 ==  0  -  success
   4.442    //    O0 == -1  -  need to call System.arraycopy
   4.443    //
   4.444 -  address generate_generic_copy(const char *name) {
   4.445 -
   4.446 +  address generate_generic_copy(const char *name,
   4.447 +                                address entry_jbyte_arraycopy,
   4.448 +                                address entry_jshort_arraycopy,
   4.449 +                                address entry_jint_arraycopy,
   4.450 +                                address entry_oop_arraycopy,
   4.451 +                                address entry_jlong_arraycopy,
   4.452 +                                address entry_checkcast_arraycopy) {
   4.453      Label L_failed, L_objArray;
   4.454  
   4.455      // Input registers
   4.456 @@ -2970,15 +2987,15 @@
   4.457  
   4.458      BLOCK_COMMENT("choose copy loop based on element size");
   4.459      __ cmp(G3_elsize, 0);
   4.460 -    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy);
   4.461 +    __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
   4.462      __ delayed()->signx(length, count); // length
   4.463  
   4.464      __ cmp(G3_elsize, LogBytesPerShort);
   4.465 -    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy);
   4.466 +    __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
   4.467      __ delayed()->signx(length, count); // length
   4.468  
   4.469      __ cmp(G3_elsize, LogBytesPerInt);
   4.470 -    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy);
   4.471 +    __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
   4.472      __ delayed()->signx(length, count); // length
   4.473  #ifdef ASSERT
   4.474      { Label L;
   4.475 @@ -2989,7 +3006,7 @@
   4.476        __ bind(L);
   4.477      }
   4.478  #endif
   4.479 -    __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy);
   4.480 +    __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
   4.481      __ delayed()->signx(length, count); // length
   4.482  
   4.483      // objArrayKlass
   4.484 @@ -3013,7 +3030,7 @@
   4.485      __ add(src, src_pos, from);       // src_addr
   4.486      __ add(dst, dst_pos, to);         // dst_addr
   4.487    __ BIND(L_plain_copy);
   4.488 -    __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
   4.489 +    __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy);
   4.490      __ delayed()->signx(length, count); // length
   4.491  
   4.492    __ BIND(L_checkcast_copy);
   4.493 @@ -3057,7 +3074,7 @@
   4.494        __ ld_ptr(G4_dst_klass, ek_offset, O4);   // dest elem klass
   4.495        // lduw(O4, sco_offset, O3);              // sco of elem klass
   4.496  
   4.497 -      __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry);
   4.498 +      __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy);
   4.499        __ delayed()->lduw(O4, sco_offset, O3);
   4.500      }
   4.501  
   4.502 @@ -3068,39 +3085,124 @@
   4.503    }
   4.504  
   4.505    void generate_arraycopy_stubs() {
   4.506 -
   4.507 -    // Note:  the disjoint stubs must be generated first, some of
   4.508 -    //        the conjoint stubs use them.
   4.509 -    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
   4.510 -    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
   4.511 -    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
   4.512 -    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
   4.513 -    StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy");
   4.514 -    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
   4.515 -    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
   4.516 -    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
   4.517 -    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
   4.518 -    StubRoutines::_arrayof_oop_disjoint_arraycopy    =  generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy");
   4.519 -
   4.520 -    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
   4.521 -    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
   4.522 -    StubRoutines::_jint_arraycopy   = generate_conjoint_int_copy(false, "jint_arraycopy");
   4.523 -    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
   4.524 -    StubRoutines::_oop_arraycopy    = generate_conjoint_oop_copy(false, "oop_arraycopy");
   4.525 -    StubRoutines::_arrayof_jbyte_arraycopy    = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
   4.526 -    StubRoutines::_arrayof_jshort_arraycopy   = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
   4.527 +    address entry;
   4.528 +    address entry_jbyte_arraycopy;
   4.529 +    address entry_jshort_arraycopy;
   4.530 +    address entry_jint_arraycopy;
   4.531 +    address entry_oop_arraycopy;
   4.532 +    address entry_jlong_arraycopy;
   4.533 +    address entry_checkcast_arraycopy;
   4.534 +
   4.535 +    //*** jbyte
   4.536 +    // Always need aligned and unaligned versions
   4.537 +    StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
   4.538 +                                                                                  "jbyte_disjoint_arraycopy");
   4.539 +    StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
   4.540 +                                                                                  &entry_jbyte_arraycopy,
   4.541 +                                                                                  "jbyte_arraycopy");
   4.542 +    StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
   4.543 +                                                                                  "arrayof_jbyte_disjoint_arraycopy");
   4.544 +    StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
   4.545 +                                                                                  "arrayof_jbyte_arraycopy");
   4.546 +
   4.547 +    //*** jshort
   4.548 +    // Always need aligned and unaligned versions
   4.549 +    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
   4.550 +                                                                                    "jshort_disjoint_arraycopy");
   4.551 +    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
   4.552 +                                                                                    &entry_jshort_arraycopy,
   4.553 +                                                                                    "jshort_arraycopy");
   4.554 +    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
   4.555 +                                                                                    "arrayof_jshort_disjoint_arraycopy");
   4.556 +    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
   4.557 +                                                                                    "arrayof_jshort_arraycopy");
   4.558 +
   4.559 +    //*** jint
   4.560 +    // Aligned versions
   4.561 +    StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
   4.562 +                                                                                "arrayof_jint_disjoint_arraycopy");
   4.563 +    StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
   4.564 +                                                                                "arrayof_jint_arraycopy");
   4.565  #ifdef _LP64
   4.566 -    // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
   4.567 -    StubRoutines::_arrayof_jint_arraycopy     = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
   4.568 -  #else
   4.569 -    StubRoutines::_arrayof_jint_arraycopy     = StubRoutines::_jint_arraycopy;
   4.570 +    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
   4.571 +    // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
   4.572 +    StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
   4.573 +                                                                                "jint_disjoint_arraycopy");
   4.574 +    StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
   4.575 +                                                                                &entry_jint_arraycopy,
   4.576 +                                                                                "jint_arraycopy");
   4.577 +#else
   4.578 +    // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
   4.579 +    // (in fact in 32bit we always have a pre-loop part even in the aligned version,
   4.580 +    //  because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
   4.581 +    StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
   4.582 +    StubRoutines::_jint_arraycopy          = StubRoutines::_arrayof_jint_arraycopy;
   4.583  #endif
   4.584 -    StubRoutines::_arrayof_jlong_arraycopy    = StubRoutines::_jlong_arraycopy;
   4.585 -    StubRoutines::_arrayof_oop_arraycopy      = StubRoutines::_oop_arraycopy;
   4.586 -
   4.587 -    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
   4.588 -    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
   4.589 -    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
   4.590 +
   4.591 +
   4.592 +    //*** jlong
   4.593 +    // It is always aligned
   4.594 +    StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
   4.595 +                                                                                  "arrayof_jlong_disjoint_arraycopy");
   4.596 +    StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
   4.597 +                                                                                  "arrayof_jlong_arraycopy");
   4.598 +    StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
   4.599 +    StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
   4.600 +
   4.601 +
   4.602 +    //*** oops
   4.603 +    // Aligned versions
   4.604 +    StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
   4.605 +                                                                                      "arrayof_oop_disjoint_arraycopy");
   4.606 +    StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
   4.607 +                                                                                      "arrayof_oop_arraycopy");
   4.608 +    // Aligned versions without pre-barriers
   4.609 +    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
   4.610 +                                                                                      "arrayof_oop_disjoint_arraycopy_uninit",
   4.611 +                                                                                      /*dest_uninitialized*/true);
   4.612 +    StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
   4.613 +                                                                                      "arrayof_oop_arraycopy_uninit",
   4.614 +                                                                                      /*dest_uninitialized*/true);
   4.615 +#ifdef _LP64
   4.616 +    if (UseCompressedOops) {
   4.617 +      // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
   4.618 +      StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
   4.619 +                                                                                    "oop_disjoint_arraycopy");
   4.620 +      StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
   4.621 +                                                                                    "oop_arraycopy");
   4.622 +      // Unaligned versions without pre-barriers
   4.623 +      StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
   4.624 +                                                                                    "oop_disjoint_arraycopy_uninit",
   4.625 +                                                                                    /*dest_uninitialized*/true);
   4.626 +      StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
   4.627 +                                                                                    "oop_arraycopy_uninit",
   4.628 +                                                                                    /*dest_uninitialized*/true);
   4.629 +    } else
   4.630 +#endif
   4.631 +    {
   4.632 +      // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
   4.633 +      StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
   4.634 +      StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
   4.635 +      StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
   4.636 +      StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
   4.637 +    }
   4.638 +
   4.639 +    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
   4.640 +    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
   4.641 +                                                                        /*dest_uninitialized*/true);
   4.642 +
   4.643 +    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
   4.644 +                                                              entry_jbyte_arraycopy,
   4.645 +                                                              entry_jshort_arraycopy,
   4.646 +                                                              entry_jint_arraycopy,
   4.647 +                                                              entry_jlong_arraycopy);
   4.648 +    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
   4.649 +                                                               entry_jbyte_arraycopy,
   4.650 +                                                               entry_jshort_arraycopy,
   4.651 +                                                               entry_jint_arraycopy,
   4.652 +                                                               entry_oop_arraycopy,
   4.653 +                                                               entry_jlong_arraycopy,
   4.654 +                                                               entry_checkcast_arraycopy);
   4.655  
   4.656      StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
   4.657      StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
   4.658 @@ -3224,21 +3326,6 @@
   4.659  
   4.660  }; // end class declaration
   4.661  
   4.662 -
   4.663 -address StubGenerator::disjoint_byte_copy_entry  = NULL;
   4.664 -address StubGenerator::disjoint_short_copy_entry = NULL;
   4.665 -address StubGenerator::disjoint_int_copy_entry   = NULL;
   4.666 -address StubGenerator::disjoint_long_copy_entry  = NULL;
   4.667 -address StubGenerator::disjoint_oop_copy_entry   = NULL;
   4.668 -
   4.669 -address StubGenerator::byte_copy_entry  = NULL;
   4.670 -address StubGenerator::short_copy_entry = NULL;
   4.671 -address StubGenerator::int_copy_entry   = NULL;
   4.672 -address StubGenerator::long_copy_entry  = NULL;
   4.673 -address StubGenerator::oop_copy_entry   = NULL;
   4.674 -
   4.675 -address StubGenerator::checkcast_copy_entry = NULL;
   4.676 -
   4.677  void StubGenerator_generate(CodeBuffer* code, bool all) {
   4.678    StubGenerator g(code, all);
   4.679  }
     5.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
     5.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
     5.3 @@ -1601,6 +1601,17 @@
     5.4    emit_byte(0xC0 | encode);
     5.5  }
     5.6  
     5.7 +void Assembler::movdl(XMMRegister dst, Address src) {
     5.8 +  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
     5.9 +  InstructionMark im(this);
    5.10 +  emit_byte(0x66);
    5.11 +  prefix(src, dst);
    5.12 +  emit_byte(0x0F);
    5.13 +  emit_byte(0x6E);
    5.14 +  emit_operand(dst, src);
    5.15 +}
    5.16 +
    5.17 +
    5.18  void Assembler::movdqa(XMMRegister dst, Address src) {
    5.19    NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    5.20    InstructionMark im(this);
    5.21 @@ -2412,7 +2423,10 @@
    5.22  }
    5.23  
    5.24  void Assembler::psrlq(XMMRegister dst, int shift) {
    5.25 -  // HMM Table D-1 says sse2 or mmx
    5.26 +  // Shift 64 bit value logically right by specified number of bits.
    5.27 +  // HMM Table D-1 says sse2 or mmx.
    5.28 +  // Do not confuse it with psrldq SSE2 instruction which
    5.29 +  // shifts 128 bit value in xmm register by number of bytes.
    5.30    NOT_LP64(assert(VM_Version::supports_sse(), ""));
    5.31  
    5.32    int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
    5.33 @@ -2423,6 +2437,18 @@
    5.34    emit_byte(shift);
    5.35  }
    5.36  
    5.37 +void Assembler::psrldq(XMMRegister dst, int shift) {
    5.38 +  // Shift 128 bit value in xmm register by number of bytes.
    5.39 +  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    5.40 +
    5.41 +  int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
    5.42 +  emit_byte(0x66);
    5.43 +  emit_byte(0x0F);
    5.44 +  emit_byte(0x73);
    5.45 +  emit_byte(0xC0 | encode);
    5.46 +  emit_byte(shift);
    5.47 +}
    5.48 +
    5.49  void Assembler::ptest(XMMRegister dst, Address src) {
    5.50    assert(VM_Version::supports_sse4_1(), "");
    5.51  
    5.52 @@ -8567,101 +8593,418 @@
    5.53  }
    5.54  #endif // _LP64
    5.55  
    5.56 -// IndexOf substring.
    5.57 -void MacroAssembler::string_indexof(Register str1, Register str2,
    5.58 -                                    Register cnt1, Register cnt2, Register result,
    5.59 -                                    XMMRegister vec, Register tmp) {
    5.60 +// IndexOf for constant substrings with size >= 8 chars
    5.61 +// which don't need to be loaded through stack.
    5.62 +void MacroAssembler::string_indexofC8(Register str1, Register str2,
    5.63 +                                      Register cnt1, Register cnt2,
    5.64 +                                      int int_cnt2,  Register result,
    5.65 +                                      XMMRegister vec, Register tmp) {
    5.66    assert(UseSSE42Intrinsics, "SSE4.2 is required");
    5.67  
    5.68 -  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
    5.69 -        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
    5.70 -
    5.71 -  push(str1); // string addr
    5.72 -  push(str2); // substr addr
    5.73 -  push(cnt2); // substr count
    5.74 -  jmpb(PREP_FOR_SCAN);
    5.75 -
    5.76 -  // Substr count saved at sp
    5.77 -  // Substr saved at sp+1*wordSize
    5.78 -  // String saved at sp+2*wordSize
    5.79 -
    5.80 -  // Reload substr for rescan
    5.81 -  bind(RELOAD_SUBSTR);
    5.82 -  movl(cnt2, Address(rsp, 0));
    5.83 -  movptr(str2, Address(rsp, wordSize));
    5.84 -  // We came here after the beginninig of the substring was
    5.85 -  // matched but the rest of it was not so we need to search
    5.86 -  // again. Start from the next element after the previous match.
    5.87 -  subptr(str1, result); // Restore counter
    5.88 -  shrl(str1, 1);
    5.89 -  addl(cnt1, str1);
    5.90 -  decrementl(cnt1);
    5.91 -  lea(str1, Address(result, 2)); // Reload string
    5.92 -
    5.93 -  // Load substr
    5.94 -  bind(PREP_FOR_SCAN);
    5.95 -  movdqu(vec, Address(str2, 0));
    5.96 -  addl(cnt1, 8);  // prime the loop
    5.97 -  subptr(str1, 16);
    5.98 -
    5.99 -  // Scan string for substr in 16-byte vectors
   5.100 -  bind(SCAN_TO_SUBSTR);
   5.101 -  subl(cnt1, 8);
   5.102 -  addptr(str1, 16);
   5.103 -
   5.104 -  // pcmpestri
   5.105 +  // This method uses pcmpestri inxtruction with bound registers
   5.106    //   inputs:
   5.107    //     xmm - substring
   5.108    //     rax - substring length (elements count)
   5.109 -  //     mem - scaned string
   5.110 +  //     mem - scanned string
   5.111    //     rdx - string length (elements count)
   5.112    //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
   5.113    //   outputs:
   5.114    //     rcx - matched index in string
   5.115    assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
   5.116  
   5.117 -  pcmpestri(vec, Address(str1, 0), 0x0d);
   5.118 -  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
   5.119 -  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
   5.120 -
   5.121 -  // Fallthrough: found a potential substr
   5.122 +  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
   5.123 +        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
   5.124 +        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
   5.125 +
   5.126 +  // Note, inline_string_indexOf() generates checks:
   5.127 +  // if (substr.count > string.count) return -1;
   5.128 +  // if (substr.count == 0) return 0;
   5.129 +  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
   5.130 +
   5.131 +  // Load substring.
   5.132 +  movdqu(vec, Address(str2, 0));
   5.133 +  movl(cnt2, int_cnt2);
   5.134 +  movptr(result, str1); // string addr
   5.135 +
   5.136 +  if (int_cnt2 > 8) {
   5.137 +    jmpb(SCAN_TO_SUBSTR);
   5.138 +
   5.139 +    // Reload substr for rescan, this code
   5.140 +    // is executed only for large substrings (> 8 chars)
   5.141 +    bind(RELOAD_SUBSTR);
   5.142 +    movdqu(vec, Address(str2, 0));
   5.143 +    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
   5.144 +
   5.145 +    bind(RELOAD_STR);
   5.146 +    // We came here after the beginning of the substring was
   5.147 +    // matched but the rest of it was not so we need to search
   5.148 +    // again. Start from the next element after the previous match.
   5.149 +
   5.150 +    // cnt2 is number of substring reminding elements and
   5.151 +    // cnt1 is number of string reminding elements when cmp failed.
   5.152 +    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
   5.153 +    subl(cnt1, cnt2);
   5.154 +    addl(cnt1, int_cnt2);
   5.155 +    movl(cnt2, int_cnt2); // Now restore cnt2
   5.156 +
   5.157 +    decrementl(cnt1);     // Shift to next element
   5.158 +    cmpl(cnt1, cnt2);
   5.159 +    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   5.160 +
   5.161 +    addptr(result, 2);
   5.162 +
   5.163 +  } // (int_cnt2 > 8)
   5.164 +
   5.165 +  // Scan string for start of substr in 16-byte vectors
   5.166 +  bind(SCAN_TO_SUBSTR);
   5.167 +  pcmpestri(vec, Address(result, 0), 0x0d);
   5.168 +  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
   5.169 +  subl(cnt1, 8);
   5.170 +  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
   5.171 +  cmpl(cnt1, cnt2);
   5.172 +  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   5.173 +  addptr(result, 16);
   5.174 +  jmpb(SCAN_TO_SUBSTR);
   5.175 +
   5.176 +  // Found a potential substr
   5.177 +  bind(FOUND_CANDIDATE);
   5.178 +  // Matched whole vector if first element matched (tmp(rcx) == 0).
   5.179 +  if (int_cnt2 == 8) {
   5.180 +    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
   5.181 +  } else { // int_cnt2 > 8
   5.182 +    jccb(Assembler::overflow, FOUND_SUBSTR);
   5.183 +  }
   5.184 +  // After pcmpestri tmp(rcx) contains matched element index
   5.185 +  // Compute start addr of substr
   5.186 +  lea(result, Address(result, tmp, Address::times_2));
   5.187  
   5.188    // Make sure string is still long enough
   5.189    subl(cnt1, tmp);
   5.190    cmpl(cnt1, cnt2);
   5.191 -  jccb(Assembler::negative, RET_NOT_FOUND);
   5.192 -  // Compute start addr of substr
   5.193 -  lea(str1, Address(str1, tmp, Address::times_2));
   5.194 -  movptr(result, str1); // save
   5.195 -
   5.196 -  // Compare potential substr
   5.197 -  addl(cnt1, 8);     // prime the loop
   5.198 -  addl(cnt2, 8);
   5.199 -  subptr(str1, 16);
   5.200 -  subptr(str2, 16);
   5.201 -
   5.202 -  // Scan 16-byte vectors of string and substr
   5.203 -  bind(SCAN_SUBSTR);
   5.204 -  subl(cnt1, 8);
   5.205 -  subl(cnt2, 8);
   5.206 -  addptr(str1, 16);
   5.207 -  addptr(str2, 16);
   5.208 -  movdqu(vec, Address(str2, 0));
   5.209 -  pcmpestri(vec, Address(str1, 0), 0x0d);
   5.210 -  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
   5.211 -  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
   5.212 -
   5.213 -  // Compute substr offset
   5.214 -  subptr(result, Address(rsp, 2*wordSize));
   5.215 -  shrl(result, 1); // index
   5.216 -  jmpb(CLEANUP);
   5.217 +  if (int_cnt2 == 8) {
   5.218 +    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
   5.219 +  } else { // int_cnt2 > 8
   5.220 +    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
   5.221 +  }
   5.222 +  // Left less then substring.
   5.223  
   5.224    bind(RET_NOT_FOUND);
   5.225    movl(result, -1);
   5.226 +  jmpb(EXIT);
   5.227 +
   5.228 +  if (int_cnt2 > 8) {
   5.229 +    // This code is optimized for the case when whole substring
   5.230 +    // is matched if its head is matched.
   5.231 +    bind(MATCH_SUBSTR_HEAD);
   5.232 +    pcmpestri(vec, Address(result, 0), 0x0d);
   5.233 +    // Reload only string if does not match
   5.234 +    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
   5.235 +
   5.236 +    Label CONT_SCAN_SUBSTR;
   5.237 +    // Compare the rest of substring (> 8 chars).
   5.238 +    bind(FOUND_SUBSTR);
   5.239 +    // First 8 chars are already matched.
   5.240 +    negptr(cnt2);
   5.241 +    addptr(cnt2, 8);
   5.242 +
   5.243 +    bind(SCAN_SUBSTR);
   5.244 +    subl(cnt1, 8);
   5.245 +    cmpl(cnt2, -8); // Do not read beyond substring
   5.246 +    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
   5.247 +    // Back-up strings to avoid reading beyond substring:
   5.248 +    // cnt1 = cnt1 - cnt2 + 8
   5.249 +    addl(cnt1, cnt2); // cnt2 is negative
   5.250 +    addl(cnt1, 8);
   5.251 +    movl(cnt2, 8); negptr(cnt2);
   5.252 +    bind(CONT_SCAN_SUBSTR);
   5.253 +    if (int_cnt2 < (int)G) {
   5.254 +      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
   5.255 +      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
   5.256 +    } else {
   5.257 +      // calculate index in register to avoid integer overflow (int_cnt2*2)
   5.258 +      movl(tmp, int_cnt2);
   5.259 +      addptr(tmp, cnt2);
   5.260 +      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
   5.261 +      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
   5.262 +    }
   5.263 +    // Need to reload strings pointers if not matched whole vector
   5.264 +    jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
   5.265 +    addptr(cnt2, 8);
   5.266 +    jccb(Assembler::negative, SCAN_SUBSTR);
   5.267 +    // Fall through if found full substring
   5.268 +
   5.269 +  } // (int_cnt2 > 8)
   5.270 +
   5.271 +  bind(RET_FOUND);
   5.272 +  // Found result if we matched full small substring.
   5.273 +  // Compute substr offset
   5.274 +  subptr(result, str1);
   5.275 +  shrl(result, 1); // index
   5.276 +  bind(EXIT);
   5.277 +
   5.278 +} // string_indexofC8
   5.279 +
   5.280 +// Small strings are loaded through stack if they cross page boundary.
   5.281 +void MacroAssembler::string_indexof(Register str1, Register str2,
   5.282 +                                    Register cnt1, Register cnt2,
   5.283 +                                    int int_cnt2,  Register result,
   5.284 +                                    XMMRegister vec, Register tmp) {
   5.285 +  assert(UseSSE42Intrinsics, "SSE4.2 is required");
   5.286 +  //
   5.287 +  // int_cnt2 is length of small (< 8 chars) constant substring
   5.288 +  // or (-1) for non constant substring in which case its length
   5.289 +  // is in cnt2 register.
   5.290 +  //
   5.291 +  // Note, inline_string_indexOf() generates checks:
   5.292 +  // if (substr.count > string.count) return -1;
   5.293 +  // if (substr.count == 0) return 0;
   5.294 +  //
   5.295 +  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
   5.296 +
   5.297 +  // This method uses pcmpestri inxtruction with bound registers
   5.298 +  //   inputs:
   5.299 +  //     xmm - substring
   5.300 +  //     rax - substring length (elements count)
   5.301 +  //     mem - scanned string
   5.302 +  //     rdx - string length (elements count)
   5.303 +  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
   5.304 +  //   outputs:
   5.305 +  //     rcx - matched index in string
   5.306 +  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
   5.307 +
   5.308 +  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
   5.309 +        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
   5.310 +        FOUND_CANDIDATE;
   5.311 +
   5.312 +  { //========================================================
   5.313 +    // We don't know where these strings are located
   5.314 +    // and we can't read beyond them. Load them through stack.
   5.315 +    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
   5.316 +
   5.317 +    movptr(tmp, rsp); // save old SP
   5.318 +
   5.319 +    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
   5.320 +      if (int_cnt2 == 1) {  // One char
   5.321 +        load_unsigned_short(result, Address(str2, 0));
   5.322 +        movdl(vec, result); // move 32 bits
   5.323 +      } else if (int_cnt2 == 2) { // Two chars
   5.324 +        movdl(vec, Address(str2, 0)); // move 32 bits
   5.325 +      } else if (int_cnt2 == 4) { // Four chars
   5.326 +        movq(vec, Address(str2, 0));  // move 64 bits
   5.327 +      } else { // cnt2 = { 3, 5, 6, 7 }
   5.328 +        // Array header size is 12 bytes in 32-bit VM
   5.329 +        // + 6 bytes for 3 chars == 18 bytes,
   5.330 +        // enough space to load vec and shift.
   5.331 +        assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
   5.332 +        movdqu(vec, Address(str2, (int_cnt2*2)-16));
   5.333 +        psrldq(vec, 16-(int_cnt2*2));
   5.334 +      }
   5.335 +    } else { // not constant substring
   5.336 +      cmpl(cnt2, 8);
   5.337 +      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
   5.338 +
   5.339 +      // We can read beyond string if srt+16 does not cross page boundary
   5.340 +      // since heaps are aligned and mapped by pages.
   5.341 +      assert(os::vm_page_size() < (int)G, "default page should be small");
   5.342 +      movl(result, str2); // We need only low 32 bits
   5.343 +      andl(result, (os::vm_page_size()-1));
   5.344 +      cmpl(result, (os::vm_page_size()-16));
   5.345 +      jccb(Assembler::belowEqual, CHECK_STR);
   5.346 +
   5.347 +      // Move small strings to stack to allow load 16 bytes into vec.
   5.348 +      subptr(rsp, 16);
   5.349 +      int stk_offset = wordSize-2;
   5.350 +      push(cnt2);
   5.351 +
   5.352 +      bind(COPY_SUBSTR);
   5.353 +      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
   5.354 +      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
   5.355 +      decrement(cnt2);
   5.356 +      jccb(Assembler::notZero, COPY_SUBSTR);
   5.357 +
   5.358 +      pop(cnt2);
   5.359 +      movptr(str2, rsp);  // New substring address
   5.360 +    } // non constant
   5.361 +
   5.362 +    bind(CHECK_STR);
   5.363 +    cmpl(cnt1, 8);
   5.364 +    jccb(Assembler::aboveEqual, BIG_STRINGS);
   5.365 +
   5.366 +    // Check cross page boundary.
   5.367 +    movl(result, str1); // We need only low 32 bits
   5.368 +    andl(result, (os::vm_page_size()-1));
   5.369 +    cmpl(result, (os::vm_page_size()-16));
   5.370 +    jccb(Assembler::belowEqual, BIG_STRINGS);
   5.371 +
   5.372 +    subptr(rsp, 16);
   5.373 +    int stk_offset = -2;
   5.374 +    if (int_cnt2 < 0) { // not constant
   5.375 +      push(cnt2);
   5.376 +      stk_offset += wordSize;
   5.377 +    }
   5.378 +    movl(cnt2, cnt1);
   5.379 +
   5.380 +    bind(COPY_STR);
   5.381 +    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
   5.382 +    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
   5.383 +    decrement(cnt2);
   5.384 +    jccb(Assembler::notZero, COPY_STR);
   5.385 +
   5.386 +    if (int_cnt2 < 0) { // not constant
   5.387 +      pop(cnt2);
   5.388 +    }
   5.389 +    movptr(str1, rsp);  // New string address
   5.390 +
   5.391 +    bind(BIG_STRINGS);
   5.392 +    // Load substring.
   5.393 +    if (int_cnt2 < 0) { // -1
   5.394 +      movdqu(vec, Address(str2, 0));
   5.395 +      push(cnt2);       // substr count
   5.396 +      push(str2);       // substr addr
   5.397 +      push(str1);       // string addr
   5.398 +    } else {
   5.399 +      // Small (< 8 chars) constant substrings are loaded already.
   5.400 +      movl(cnt2, int_cnt2);
   5.401 +    }
   5.402 +    push(tmp);  // original SP
   5.403 +
   5.404 +  } // Finished loading
   5.405 +
   5.406 +  //========================================================
   5.407 +  // Start search
   5.408 +  //
   5.409 +
   5.410 +  movptr(result, str1); // string addr
   5.411 +
   5.412 +  if (int_cnt2  < 0) {  // Only for non constant substring
   5.413 +    jmpb(SCAN_TO_SUBSTR);
   5.414 +
   5.415 +    // SP saved at sp+0
   5.416 +    // String saved at sp+1*wordSize
   5.417 +    // Substr saved at sp+2*wordSize
   5.418 +    // Substr count saved at sp+3*wordSize
   5.419 +
   5.420 +    // Reload substr for rescan, this code
   5.421 +    // is executed only for large substrings (> 8 chars)
   5.422 +    bind(RELOAD_SUBSTR);
   5.423 +    movptr(str2, Address(rsp, 2*wordSize));
   5.424 +    movl(cnt2, Address(rsp, 3*wordSize));
   5.425 +    movdqu(vec, Address(str2, 0));
   5.426 +    // We came here after the beginning of the substring was
   5.427 +    // matched but the rest of it was not so we need to search
   5.428 +    // again. Start from the next element after the previous match.
   5.429 +    subptr(str1, result); // Restore counter
   5.430 +    shrl(str1, 1);
   5.431 +    addl(cnt1, str1);
   5.432 +    decrementl(cnt1);   // Shift to next element
   5.433 +    cmpl(cnt1, cnt2);
   5.434 +    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   5.435 +
   5.436 +    addptr(result, 2);
   5.437 +  } // non constant
   5.438 +
   5.439 +  // Scan string for start of substr in 16-byte vectors
   5.440 +  bind(SCAN_TO_SUBSTR);
   5.441 +  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
   5.442 +  pcmpestri(vec, Address(result, 0), 0x0d);
   5.443 +  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
   5.444 +  subl(cnt1, 8);
   5.445 +  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
   5.446 +  cmpl(cnt1, cnt2);
   5.447 +  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   5.448 +  addptr(result, 16);
   5.449 +
   5.450 +  bind(ADJUST_STR);
   5.451 +  cmpl(cnt1, 8); // Do not read beyond string
   5.452 +  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
   5.453 +  // Back-up string to avoid reading beyond string.
   5.454 +  lea(result, Address(result, cnt1, Address::times_2, -16));
   5.455 +  movl(cnt1, 8);
   5.456 +  jmpb(SCAN_TO_SUBSTR);
   5.457 +
   5.458 +  // Found a potential substr
   5.459 +  bind(FOUND_CANDIDATE);
   5.460 +  // After pcmpestri tmp(rcx) contains matched element index
   5.461 +
   5.462 +  // Make sure string is still long enough
   5.463 +  subl(cnt1, tmp);
   5.464 +  cmpl(cnt1, cnt2);
   5.465 +  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
   5.466 +  // Left less then substring.
   5.467 +
   5.468 +  bind(RET_NOT_FOUND);
   5.469 +  movl(result, -1);
   5.470 +  jmpb(CLEANUP);
   5.471 +
   5.472 +  bind(FOUND_SUBSTR);
   5.473 +  // Compute start addr of substr
   5.474 +  lea(result, Address(result, tmp, Address::times_2));
   5.475 +
   5.476 +  if (int_cnt2 > 0) { // Constant substring
   5.477 +    // Repeat search for small substring (< 8 chars)
   5.478 +    // from new point without reloading substring.
   5.479 +    // Have to check that we don't read beyond string.
   5.480 +    cmpl(tmp, 8-int_cnt2);
   5.481 +    jccb(Assembler::greater, ADJUST_STR);
   5.482 +    // Fall through if matched whole substring.
   5.483 +  } else { // non constant
   5.484 +    assert(int_cnt2 == -1, "should be != 0");
   5.485 +
   5.486 +    addl(tmp, cnt2);
   5.487 +    // Found result if we matched whole substring.
   5.488 +    cmpl(tmp, 8);
   5.489 +    jccb(Assembler::lessEqual, RET_FOUND);
   5.490 +
   5.491 +    // Repeat search for small substring (<= 8 chars)
   5.492 +    // from new point 'str1' without reloading substring.
   5.493 +    cmpl(cnt2, 8);
   5.494 +    // Have to check that we don't read beyond string.
   5.495 +    jccb(Assembler::lessEqual, ADJUST_STR);
   5.496 +
   5.497 +    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
   5.498 +    // Compare the rest of substring (> 8 chars).
   5.499 +    movptr(str1, result);
   5.500 +
   5.501 +    cmpl(tmp, cnt2);
   5.502 +    // First 8 chars are already matched.
   5.503 +    jccb(Assembler::equal, CHECK_NEXT);
   5.504 +
   5.505 +    bind(SCAN_SUBSTR);
   5.506 +    pcmpestri(vec, Address(str1, 0), 0x0d);
   5.507 +    // Need to reload strings pointers if not matched whole vector
   5.508 +    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
   5.509 +
   5.510 +    bind(CHECK_NEXT);
   5.511 +    subl(cnt2, 8);
   5.512 +    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
   5.513 +    addptr(str1, 16);
   5.514 +    addptr(str2, 16);
   5.515 +    subl(cnt1, 8);
   5.516 +    cmpl(cnt2, 8); // Do not read beyond substring
   5.517 +    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
   5.518 +    // Back-up strings to avoid reading beyond substring.
   5.519 +    lea(str2, Address(str2, cnt2, Address::times_2, -16));
   5.520 +    lea(str1, Address(str1, cnt2, Address::times_2, -16));
   5.521 +    subl(cnt1, cnt2);
   5.522 +    movl(cnt2, 8);
   5.523 +    addl(cnt1, 8);
   5.524 +    bind(CONT_SCAN_SUBSTR);
   5.525 +    movdqu(vec, Address(str2, 0));
   5.526 +    jmpb(SCAN_SUBSTR);
   5.527 +
   5.528 +    bind(RET_FOUND_LONG);
   5.529 +    movptr(str1, Address(rsp, wordSize));
   5.530 +  } // non constant
   5.531 +
   5.532 +  bind(RET_FOUND);
   5.533 +  // Compute substr offset
   5.534 +  subptr(result, str1);
   5.535 +  shrl(result, 1); // index
   5.536  
   5.537    bind(CLEANUP);
   5.538 -  addptr(rsp, 3*wordSize);
   5.539 -}
   5.540 +  pop(rsp); // restore SP
   5.541 +
   5.542 +} // string_indexof
   5.543  
   5.544  // Compare strings.
   5.545  void MacroAssembler::string_compare(Register str1, Register str2,
     6.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 03 21:02:56 2011 -0800
     6.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 03 23:31:45 2011 -0800
     6.3 @@ -1121,6 +1121,7 @@
     6.4  
     6.5    void movdl(XMMRegister dst, Register src);
     6.6    void movdl(Register dst, XMMRegister src);
     6.7 +  void movdl(XMMRegister dst, Address src);
     6.8  
     6.9    // Move Double Quadword
    6.10    void movdq(XMMRegister dst, Register src);
    6.11 @@ -1288,9 +1289,12 @@
    6.12    void pshuflw(XMMRegister dst, XMMRegister src, int mode);
    6.13    void pshuflw(XMMRegister dst, Address src,     int mode);
    6.14  
    6.15 -  // Shift Right Logical Quadword Immediate
    6.16 +  // Shift Right by bits Logical Quadword Immediate
    6.17    void psrlq(XMMRegister dst, int shift);
    6.18  
    6.19 +  // Shift Right by bytes Logical DoubleQuadword Immediate
    6.20 +  void psrldq(XMMRegister dst, int shift);
    6.21 +
    6.22    // Logical Compare Double Quadword
    6.23    void ptest(XMMRegister dst, XMMRegister src);
    6.24    void ptest(XMMRegister dst, Address src);
    6.25 @@ -2290,10 +2294,22 @@
    6.26    void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
    6.27  
    6.28    // IndexOf strings.
    6.29 +  // Small strings are loaded through stack if they cross page boundary.
    6.30    void string_indexof(Register str1, Register str2,
    6.31 -                      Register cnt1, Register cnt2, Register result,
    6.32 +                      Register cnt1, Register cnt2,
    6.33 +                      int int_cnt2,  Register result,
    6.34                        XMMRegister vec, Register tmp);
    6.35  
    6.36 +  // IndexOf for constant substrings with size >= 8 elements
    6.37 +  // which don't need to be loaded through stack.
    6.38 +  void string_indexofC8(Register str1, Register str2,
    6.39 +                      Register cnt1, Register cnt2,
    6.40 +                      int int_cnt2,  Register result,
    6.41 +                      XMMRegister vec, Register tmp);
    6.42 +
    6.43 +    // Smallest code: we don't need to load through stack,
    6.44 +    // check string tail.
    6.45 +
    6.46    // Compare strings.
    6.47    void string_compare(Register str1, Register str2,
    6.48                        Register cnt1, Register cnt2, Register result,
     7.1 --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
     7.2 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
     7.3 @@ -456,10 +456,8 @@
     7.4    __ verify_not_null_oop(rax);
     7.5  
     7.6    // search an exception handler (rax: exception oop, rdx: throwing pc)
     7.7 -  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id)));
     7.8 -
     7.9 -  __ stop("should not reach here");
    7.10 -
    7.11 +  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
    7.12 +  __ should_not_reach_here();
    7.13    assert(code_offset() - offset <= exception_handler_size, "overflow");
    7.14    __ end_a_stub();
    7.15  
     8.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
     8.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
     8.3 @@ -248,11 +248,14 @@
     8.4  #ifdef _LP64
     8.5    align_dummy_0, align_dummy_1,
     8.6  #endif // _LP64
     8.7 -  dummy1, SLOT2(dummy1H)                                                                    // 0, 4
     8.8 -  dummy2, SLOT2(dummy2H)                                                                    // 8, 12
     8.9 -  // Two temps to be used as needed by users of save/restore callee registers
    8.10 -  temp_2_off, SLOT2(temp_2H_off)                                                            // 16, 20
    8.11 -  temp_1_off, SLOT2(temp_1H_off)                                                            // 24, 28
    8.12 +#ifdef _WIN64
    8.13 +  // Windows always allocates space for it's argument registers (see
    8.14 +  // frame::arg_reg_save_area_bytes).
    8.15 +  arg_reg_save_1, arg_reg_save_1H,                                                          // 0, 4
    8.16 +  arg_reg_save_2, arg_reg_save_2H,                                                          // 8, 12
    8.17 +  arg_reg_save_3, arg_reg_save_3H,                                                          // 16, 20
    8.18 +  arg_reg_save_4, arg_reg_save_4H,                                                          // 24, 28
    8.19 +#endif // _WIN64
    8.20    xmm_regs_as_doubles_off,                                                                  // 32
    8.21    float_regs_as_doubles_off = xmm_regs_as_doubles_off + xmm_regs_as_doubles_size_in_slots,  // 160
    8.22    fpu_state_off = float_regs_as_doubles_off + float_regs_as_doubles_size_in_slots,          // 224
    8.23 @@ -282,24 +285,7 @@
    8.24    rax_off, SLOT2(raxH_off)                                                                  // 480, 484
    8.25    saved_rbp_off, SLOT2(saved_rbpH_off)                                                      // 488, 492
    8.26    return_off, SLOT2(returnH_off)                                                            // 496, 500
    8.27 -  reg_save_frame_size,  // As noted: neglects any parameters to runtime                     // 504
    8.28 -
    8.29 -#ifdef _WIN64
    8.30 -  c_rarg0_off = rcx_off,
    8.31 -#else
    8.32 -  c_rarg0_off = rdi_off,
    8.33 -#endif // WIN64
    8.34 -
    8.35 -  // equates
    8.36 -
    8.37 -  // illegal instruction handler
    8.38 -  continue_dest_off = temp_1_off,
    8.39 -
    8.40 -  // deoptimization equates
    8.41 -  fp0_off = float_regs_as_doubles_off, // slot for java float/double return value
    8.42 -  xmm0_off = xmm_regs_as_doubles_off,  // slot for java float/double return value
    8.43 -  deopt_type = temp_2_off,             // slot for type of deopt in progress
    8.44 -  ret_type = temp_1_off                // slot for return type
    8.45 +  reg_save_frame_size   // As noted: neglects any parameters to runtime                     // 504
    8.46  };
    8.47  
    8.48  
    8.49 @@ -405,11 +391,6 @@
    8.50                                     bool save_fpu_registers = true) {
    8.51    __ block_comment("save_live_registers");
    8.52  
    8.53 -  // 64bit passes the args in regs to the c++ runtime
    8.54 -  int frame_size_in_slots = reg_save_frame_size NOT_LP64(+ num_rt_args); // args + thread
    8.55 -  // frame_size = round_to(frame_size, 4);
    8.56 -  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word );
    8.57 -
    8.58    __ pusha();         // integer registers
    8.59  
    8.60    // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
    8.61 @@ -642,19 +623,58 @@
    8.62  }
    8.63  
    8.64  
    8.65 -void Runtime1::generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_maps, OopMap* oop_map, bool save_fpu_registers) {
    8.66 +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
    8.67 +  __ block_comment("generate_handle_exception");
    8.68 +
    8.69    // incoming parameters
    8.70    const Register exception_oop = rax;
    8.71 -  const Register exception_pc = rdx;
    8.72 +  const Register exception_pc  = rdx;
    8.73    // other registers used in this stub
    8.74 -  const Register real_return_addr = rbx;
    8.75    const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
    8.76  
    8.77 -  __ block_comment("generate_handle_exception");
    8.78 +  // Save registers, if required.
    8.79 +  OopMapSet* oop_maps = new OopMapSet();
    8.80 +  OopMap* oop_map = NULL;
    8.81 +  switch (id) {
    8.82 +  case forward_exception_id:
    8.83 +    // We're handling an exception in the context of a compiled frame.
    8.84 +    // The registers have been saved in the standard places.  Perform
    8.85 +    // an exception lookup in the caller and dispatch to the handler
    8.86 +    // if found.  Otherwise unwind and dispatch to the callers
    8.87 +    // exception handler.
    8.88 +    oop_map = generate_oop_map(sasm, 1 /*thread*/);
    8.89 +
    8.90 +    // load and clear pending exception oop into RAX
    8.91 +    __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
    8.92 +    __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
    8.93 +
    8.94 +    // load issuing PC (the return address for this stub) into rdx
    8.95 +    __ movptr(exception_pc, Address(rbp, 1*BytesPerWord));
    8.96 +
    8.97 +    // make sure that the vm_results are cleared (may be unnecessary)
    8.98 +    __ movptr(Address(thread, JavaThread::vm_result_offset()),   NULL_WORD);
    8.99 +    __ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
   8.100 +    break;
   8.101 +  case handle_exception_nofpu_id:
   8.102 +  case handle_exception_id:
   8.103 +    // At this point all registers MAY be live.
   8.104 +    oop_map = save_live_registers(sasm, 1 /*thread*/, id == handle_exception_nofpu_id);
   8.105 +    break;
   8.106 +  case handle_exception_from_callee_id: {
   8.107 +    // At this point all registers except exception oop (RAX) and
   8.108 +    // exception pc (RDX) are dead.
   8.109 +    const int frame_size = 2 /*BP, return address*/ NOT_LP64(+ 1 /*thread*/) WIN64_ONLY(+ frame::arg_reg_save_area_bytes / BytesPerWord);
   8.110 +    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
   8.111 +    sasm->set_frame_size(frame_size);
   8.112 +    WIN64_ONLY(__ subq(rsp, frame::arg_reg_save_area_bytes));
   8.113 +    break;
   8.114 +  }
   8.115 +  default:  ShouldNotReachHere();
   8.116 +  }
   8.117  
   8.118  #ifdef TIERED
   8.119    // C2 can leave the fpu stack dirty
   8.120 -  if (UseSSE < 2 ) {
   8.121 +  if (UseSSE < 2) {
   8.122      __ empty_FPU_stack();
   8.123    }
   8.124  #endif // TIERED
   8.125 @@ -686,11 +706,7 @@
   8.126    // save exception oop and issuing pc into JavaThread
   8.127    // (exception handler will load it from here)
   8.128    __ movptr(Address(thread, JavaThread::exception_oop_offset()), exception_oop);
   8.129 -  __ movptr(Address(thread, JavaThread::exception_pc_offset()), exception_pc);
   8.130 -
   8.131 -  // save real return address (pc that called this stub)
   8.132 -  __ movptr(real_return_addr, Address(rbp, 1*BytesPerWord));
   8.133 -  __ movptr(Address(rsp, temp_1_off * VMRegImpl::stack_slot_size), real_return_addr);
   8.134 +  __ movptr(Address(thread, JavaThread::exception_pc_offset()),  exception_pc);
   8.135  
   8.136    // patch throwing pc into return address (has bci & oop map)
   8.137    __ movptr(Address(rbp, 1*BytesPerWord), exception_pc);
   8.138 @@ -700,33 +716,41 @@
   8.139    int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
   8.140    oop_maps->add_gc_map(call_offset, oop_map);
   8.141  
   8.142 -  // rax,: handler address
   8.143 +  // rax: handler address
   8.144    //      will be the deopt blob if nmethod was deoptimized while we looked up
   8.145    //      handler regardless of whether handler existed in the nmethod.
   8.146  
   8.147    // only rax, is valid at this time, all other registers have been destroyed by the runtime call
   8.148    __ invalidate_registers(false, true, true, true, true, true);
   8.149  
   8.150 -#ifdef ASSERT
   8.151 -  // Do we have an exception handler in the nmethod?
   8.152 -  Label done;
   8.153 -  __ testptr(rax, rax);
   8.154 -  __ jcc(Assembler::notZero, done);
   8.155 -  __ stop("no handler found");
   8.156 -  __ bind(done);
   8.157 -#endif
   8.158 -
   8.159 -  // exception handler found
   8.160 -  // patch the return address -> the stub will directly return to the exception handler
   8.161 +  // patch the return address, this stub will directly return to the exception handler
   8.162    __ movptr(Address(rbp, 1*BytesPerWord), rax);
   8.163  
   8.164 -  // restore registers
   8.165 -  restore_live_registers(sasm, save_fpu_registers);
   8.166 +  switch (id) {
   8.167 +  case forward_exception_id:
   8.168 +  case handle_exception_nofpu_id:
   8.169 +  case handle_exception_id:
   8.170 +    // Restore the registers that were saved at the beginning.
   8.171 +    restore_live_registers(sasm, id == handle_exception_nofpu_id);
   8.172 +    break;
   8.173 +  case handle_exception_from_callee_id:
   8.174 +    // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
   8.175 +    // since we do a leave anyway.
   8.176  
   8.177 -  // return to exception handler
   8.178 -  __ leave();
   8.179 -  __ ret(0);
   8.180 +    // Pop the return address since we are possibly changing SP (restoring from BP).
   8.181 +    __ leave();
   8.182 +    __ pop(rcx);
   8.183  
   8.184 +    // Restore SP from BP if the exception PC is a method handle call site.
   8.185 +    NOT_LP64(__ get_thread(thread);)
   8.186 +    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
   8.187 +    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
   8.188 +    __ jmp(rcx);  // jump to exception handler
   8.189 +    break;
   8.190 +  default:  ShouldNotReachHere();
   8.191 +  }
   8.192 +
   8.193 +  return oop_maps;
   8.194  }
   8.195  
   8.196  
   8.197 @@ -791,7 +815,7 @@
   8.198    // the pop is also necessary to simulate the effect of a ret(0)
   8.199    __ pop(exception_pc);
   8.200  
   8.201 -  // Restore SP from BP if the exception PC is a MethodHandle call site.
   8.202 +  // Restore SP from BP if the exception PC is a method handle call site.
   8.203    NOT_LP64(__ get_thread(thread);)
   8.204    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
   8.205    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
   8.206 @@ -934,7 +958,6 @@
   8.207    __ ret(0);
   8.208  
   8.209    return oop_maps;
   8.210 -
   8.211  }
   8.212  
   8.213  
   8.214 @@ -952,35 +975,9 @@
   8.215    switch (id) {
   8.216      case forward_exception_id:
   8.217        {
   8.218 -        // we're handling an exception in the context of a compiled
   8.219 -        // frame.  The registers have been saved in the standard
   8.220 -        // places.  Perform an exception lookup in the caller and
   8.221 -        // dispatch to the handler if found.  Otherwise unwind and
   8.222 -        // dispatch to the callers exception handler.
   8.223 -
   8.224 -        const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread);
   8.225 -        const Register exception_oop = rax;
   8.226 -        const Register exception_pc = rdx;
   8.227 -
   8.228 -        // load pending exception oop into rax,
   8.229 -        __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
   8.230 -        // clear pending exception
   8.231 -        __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
   8.232 -
   8.233 -        // load issuing PC (the return address for this stub) into rdx
   8.234 -        __ movptr(exception_pc, Address(rbp, 1*BytesPerWord));
   8.235 -
   8.236 -        // make sure that the vm_results are cleared (may be unnecessary)
   8.237 -        __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
   8.238 -        __ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
   8.239 -
   8.240 -        // verify that that there is really a valid exception in rax,
   8.241 -        __ verify_not_null_oop(exception_oop);
   8.242 -
   8.243 -        oop_maps = new OopMapSet();
   8.244 -        OopMap* oop_map = generate_oop_map(sasm, 1);
   8.245 -        generate_handle_exception(sasm, oop_maps, oop_map);
   8.246 -        __ stop("should not reach here");
   8.247 +        oop_maps = generate_handle_exception(id, sasm);
   8.248 +        __ leave();
   8.249 +        __ ret(0);
   8.250        }
   8.251        break;
   8.252  
   8.253 @@ -1315,13 +1312,15 @@
   8.254        break;
   8.255  
   8.256      case handle_exception_nofpu_id:
   8.257 -      save_fpu_registers = false;
   8.258 -      // fall through
   8.259      case handle_exception_id:
   8.260        { StubFrame f(sasm, "handle_exception", dont_gc_arguments);
   8.261 -        oop_maps = new OopMapSet();
   8.262 -        OopMap* oop_map = save_live_registers(sasm, 1, save_fpu_registers);
   8.263 -        generate_handle_exception(sasm, oop_maps, oop_map, save_fpu_registers);
   8.264 +        oop_maps = generate_handle_exception(id, sasm);
   8.265 +      }
   8.266 +      break;
   8.267 +
   8.268 +    case handle_exception_from_callee_id:
   8.269 +      { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
   8.270 +        oop_maps = generate_handle_exception(id, sasm);
   8.271        }
   8.272        break;
   8.273  
     9.1 --- a/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Mar 03 21:02:56 2011 -0800
     9.2 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Mar 03 23:31:45 2011 -0800
     9.3 @@ -419,6 +419,7 @@
     9.4  
     9.5    // some handy addresses
     9.6    Address rbx_method_fie(     rbx,      methodOopDesc::from_interpreted_offset() );
     9.7 +  Address rbx_method_fce(     rbx,      methodOopDesc::from_compiled_offset() );
     9.8  
     9.9    Address rcx_mh_vmtarget(    rcx_recv, java_dyn_MethodHandle::vmtarget_offset_in_bytes() );
    9.10    Address rcx_dmh_vmindex(    rcx_recv, sun_dyn_DirectMethodHandle::vmindex_offset_in_bytes() );
    9.11 @@ -448,12 +449,10 @@
    9.12    case _raise_exception:
    9.13      {
    9.14        // Not a real MH entry, but rather shared code for raising an
    9.15 -      // exception.  Since we use a C2I adapter to set up the
    9.16 -      // interpreter state, arguments are expected in compiler
    9.17 -      // argument registers.
    9.18 +      // exception.  Since we use the compiled entry, arguments are
    9.19 +      // expected in compiler argument registers.
    9.20        assert(raise_exception_method(), "must be set");
    9.21 -      address c2i_entry = raise_exception_method()->get_c2i_entry();
    9.22 -      assert(c2i_entry, "method must be linked");
    9.23 +      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
    9.24  
    9.25        const Register rdi_pc = rax;
    9.26        __ pop(rdi_pc);  // caller PC
    9.27 @@ -472,13 +471,10 @@
    9.28        __ jccb(Assembler::zero, L_no_method);
    9.29        __ verify_oop(rbx_method);
    9.30  
    9.31 -      // 32-bit: push remaining arguments as if coming from the compiler.
    9.32        NOT_LP64(__ push(rarg2_required));
    9.33 +      __ push(rdi_pc);         // restore caller PC
    9.34 +      __ jmp(rbx_method_fce);  // jump to compiled entry
    9.35  
    9.36 -      __ push(rdi_pc);  // restore caller PC
    9.37 -      __ jump(ExternalAddress(c2i_entry));  // do C2I transition
    9.38 -
    9.39 -      // If we get here, the Java runtime did not do its job of creating the exception.
    9.40        // Do something that is at least causes a valid throw from the interpreter.
    9.41        __ bind(L_no_method);
    9.42        __ push(rarg2_required);
    10.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 03 21:02:56 2011 -0800
    10.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 03 23:31:45 2011 -0800
    10.3 @@ -439,10 +439,6 @@
    10.4      // Verify that there is really a valid exception in RAX.
    10.5      __ verify_oop(exception_oop);
    10.6  
    10.7 -    // Restore SP from BP if the exception PC is a MethodHandle call site.
    10.8 -    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
    10.9 -    __ cmovptr(Assembler::notEqual, rsp, rbp);
   10.10 -
   10.11      // continue at exception handler (return address removed)
   10.12      // rax: exception
   10.13      // rbx: exception handler
   10.14 @@ -733,18 +729,19 @@
   10.15    //  Input:
   10.16    //     start   -  starting address
   10.17    //     count   -  element count
   10.18 -  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
   10.19 +  void  gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
   10.20      assert_different_registers(start, count);
   10.21      BarrierSet* bs = Universe::heap()->barrier_set();
   10.22      switch (bs->kind()) {
   10.23        case BarrierSet::G1SATBCT:
   10.24        case BarrierSet::G1SATBCTLogging:
   10.25 -        {
   10.26 -          __ pusha();                      // push registers
   10.27 -          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
   10.28 -                          start, count);
   10.29 -          __ popa();
   10.30 -        }
   10.31 +        // With G1, don't generate the call if we statically know that the target in uninitialized
   10.32 +        if (!uninitialized_target) {
   10.33 +           __ pusha();                      // push registers
   10.34 +           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
   10.35 +                           start, count);
   10.36 +           __ popa();
   10.37 +         }
   10.38          break;
   10.39        case BarrierSet::CardTableModRef:
   10.40        case BarrierSet::CardTableExtension:
   10.41 @@ -923,7 +920,8 @@
   10.42  
   10.43    address generate_disjoint_copy(BasicType t, bool aligned,
   10.44                                   Address::ScaleFactor sf,
   10.45 -                                 address* entry, const char *name) {
   10.46 +                                 address* entry, const char *name,
   10.47 +                                 bool dest_uninitialized = false) {
   10.48      __ align(CodeEntryAlignment);
   10.49      StubCodeMark mark(this, "StubRoutines", name);
   10.50      address start = __ pc();
   10.51 @@ -945,16 +943,19 @@
   10.52      __ movptr(from , Address(rsp, 12+ 4));
   10.53      __ movptr(to   , Address(rsp, 12+ 8));
   10.54      __ movl(count, Address(rsp, 12+ 12));
   10.55 +
   10.56 +    if (entry != NULL) {
   10.57 +      *entry = __ pc(); // Entry point from conjoint arraycopy stub.
   10.58 +      BLOCK_COMMENT("Entry:");
   10.59 +    }
   10.60 +
   10.61      if (t == T_OBJECT) {
   10.62        __ testl(count, count);
   10.63        __ jcc(Assembler::zero, L_0_count);
   10.64 -      gen_write_ref_array_pre_barrier(to, count);
   10.65 +      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
   10.66        __ mov(saved_to, to);          // save 'to'
   10.67      }
   10.68  
   10.69 -    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
   10.70 -    BLOCK_COMMENT("Entry:");
   10.71 -
   10.72      __ subptr(to, from); // to --> to_from
   10.73      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
   10.74      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
   10.75 @@ -1085,7 +1086,8 @@
   10.76    address generate_conjoint_copy(BasicType t, bool aligned,
   10.77                                   Address::ScaleFactor sf,
   10.78                                   address nooverlap_target,
   10.79 -                                 address* entry, const char *name) {
   10.80 +                                 address* entry, const char *name,
   10.81 +                                 bool dest_uninitialized = false) {
   10.82      __ align(CodeEntryAlignment);
   10.83      StubCodeMark mark(this, "StubRoutines", name);
   10.84      address start = __ pc();
   10.85 @@ -1108,23 +1110,17 @@
   10.86      __ movptr(src  , Address(rsp, 12+ 4));   // from
   10.87      __ movptr(dst  , Address(rsp, 12+ 8));   // to
   10.88      __ movl2ptr(count, Address(rsp, 12+12)); // count
   10.89 -    if (t == T_OBJECT) {
   10.90 -       gen_write_ref_array_pre_barrier(dst, count);
   10.91 -    }
   10.92  
   10.93      if (entry != NULL) {
   10.94        *entry = __ pc(); // Entry point from generic arraycopy stub.
   10.95        BLOCK_COMMENT("Entry:");
   10.96      }
   10.97  
   10.98 -    if (t == T_OBJECT) {
   10.99 -      __ testl(count, count);
  10.100 -      __ jcc(Assembler::zero, L_0_count);
  10.101 -    }
  10.102 +    // nooverlap_target expects arguments in rsi and rdi.
  10.103      __ mov(from, src);
  10.104      __ mov(to  , dst);
  10.105  
  10.106 -    // arrays overlap test
  10.107 +    // arrays overlap test: dispatch to disjoint stub if necessary.
  10.108      RuntimeAddress nooverlap(nooverlap_target);
  10.109      __ cmpptr(dst, src);
  10.110      __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
  10.111 @@ -1132,6 +1128,12 @@
  10.112      __ cmpptr(dst, end);
  10.113      __ jump_cc(Assembler::aboveEqual, nooverlap);
  10.114  
  10.115 +    if (t == T_OBJECT) {
  10.116 +      __ testl(count, count);
  10.117 +      __ jcc(Assembler::zero, L_0_count);
  10.118 +      gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
  10.119 +    }
  10.120 +
  10.121      // copy from high to low
  10.122      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
  10.123      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
  10.124 @@ -1416,7 +1418,7 @@
  10.125    //    rax, ==  0  -  success
  10.126    //    rax, == -1^K - failure, where K is partial transfer count
  10.127    //
  10.128 -  address generate_checkcast_copy(const char *name, address* entry) {
  10.129 +  address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
  10.130      __ align(CodeEntryAlignment);
  10.131      StubCodeMark mark(this, "StubRoutines", name);
  10.132      address start = __ pc();
  10.133 @@ -1451,8 +1453,10 @@
  10.134      __ movptr(to,         to_arg);
  10.135      __ movl2ptr(length, length_arg);
  10.136  
  10.137 -    *entry = __ pc(); // Entry point from generic arraycopy stub.
  10.138 -    BLOCK_COMMENT("Entry:");
  10.139 +    if (entry != NULL) {
  10.140 +      *entry = __ pc(); // Entry point from generic arraycopy stub.
  10.141 +      BLOCK_COMMENT("Entry:");
  10.142 +    }
  10.143  
  10.144      //---------------------------------------------------------------
  10.145      // Assembler stub will be used for this call to arraycopy
  10.146 @@ -1475,7 +1479,7 @@
  10.147      Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
  10.148  
  10.149      // Copy from low to high addresses, indexed from the end of each array.
  10.150 -    gen_write_ref_array_pre_barrier(to, count);
  10.151 +    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
  10.152      __ lea(end_from, end_from_addr);
  10.153      __ lea(end_to,   end_to_addr);
  10.154      assert(length == count, "");        // else fix next line:
  10.155 @@ -2038,6 +2042,15 @@
  10.156          generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
  10.157                                 &entry_oop_arraycopy, "oop_arraycopy");
  10.158  
  10.159 +    StubRoutines::_oop_disjoint_arraycopy_uninit =
  10.160 +        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
  10.161 +                               "oop_disjoint_arraycopy_uninit",
  10.162 +                               /*dest_uninitialized*/true);
  10.163 +    StubRoutines::_oop_arraycopy_uninit =
  10.164 +        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
  10.165 +                               NULL, "oop_arraycopy_uninit",
  10.166 +                               /*dest_uninitialized*/true);
  10.167 +
  10.168      StubRoutines::_jlong_disjoint_arraycopy =
  10.169          generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
  10.170      StubRoutines::_jlong_arraycopy =
  10.171 @@ -2051,20 +2064,20 @@
  10.172      StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
  10.173      StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
  10.174  
  10.175 -    StubRoutines::_arrayof_jint_disjoint_arraycopy  =
  10.176 -        StubRoutines::_jint_disjoint_arraycopy;
  10.177 -    StubRoutines::_arrayof_oop_disjoint_arraycopy   =
  10.178 -        StubRoutines::_oop_disjoint_arraycopy;
  10.179 -    StubRoutines::_arrayof_jlong_disjoint_arraycopy =
  10.180 -        StubRoutines::_jlong_disjoint_arraycopy;
  10.181 +    StubRoutines::_arrayof_jint_disjoint_arraycopy       = StubRoutines::_jint_disjoint_arraycopy;
  10.182 +    StubRoutines::_arrayof_oop_disjoint_arraycopy        = StubRoutines::_oop_disjoint_arraycopy;
  10.183 +    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
  10.184 +    StubRoutines::_arrayof_jlong_disjoint_arraycopy      = StubRoutines::_jlong_disjoint_arraycopy;
  10.185  
  10.186 -    StubRoutines::_arrayof_jint_arraycopy  = StubRoutines::_jint_arraycopy;
  10.187 -    StubRoutines::_arrayof_oop_arraycopy   = StubRoutines::_oop_arraycopy;
  10.188 -    StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
  10.189 +    StubRoutines::_arrayof_jint_arraycopy       = StubRoutines::_jint_arraycopy;
  10.190 +    StubRoutines::_arrayof_oop_arraycopy        = StubRoutines::_oop_arraycopy;
  10.191 +    StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
  10.192 +    StubRoutines::_arrayof_jlong_arraycopy      = StubRoutines::_jlong_arraycopy;
  10.193  
  10.194      StubRoutines::_checkcast_arraycopy =
  10.195 -        generate_checkcast_copy("checkcast_arraycopy",
  10.196 -                                  &entry_checkcast_arraycopy);
  10.197 +        generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
  10.198 +    StubRoutines::_checkcast_arraycopy_uninit =
  10.199 +        generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
  10.200  
  10.201      StubRoutines::_unsafe_arraycopy =
  10.202          generate_unsafe_copy("unsafe_arraycopy",
    11.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 03 21:02:56 2011 -0800
    11.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 03 23:31:45 2011 -0800
    11.3 @@ -1,5 +1,5 @@
    11.4  /*
    11.5 - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
    11.6 + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
    11.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    11.8   *
    11.9   * This code is free software; you can redistribute it and/or modify it
   11.10 @@ -1057,20 +1057,6 @@
   11.11      return start;
   11.12    }
   11.13  
   11.14 -  static address disjoint_byte_copy_entry;
   11.15 -  static address disjoint_short_copy_entry;
   11.16 -  static address disjoint_int_copy_entry;
   11.17 -  static address disjoint_long_copy_entry;
   11.18 -  static address disjoint_oop_copy_entry;
   11.19 -
   11.20 -  static address byte_copy_entry;
   11.21 -  static address short_copy_entry;
   11.22 -  static address int_copy_entry;
   11.23 -  static address long_copy_entry;
   11.24 -  static address oop_copy_entry;
   11.25 -
   11.26 -  static address checkcast_copy_entry;
   11.27 -
   11.28    //
   11.29    // Verify that a register contains clean 32-bits positive value
   11.30    // (high 32-bits are 0) so it could be used in 64-bits shifts.
   11.31 @@ -1173,34 +1159,35 @@
   11.32    // Generate code for an array write pre barrier
   11.33    //
   11.34    //     addr    -  starting address
   11.35 -  //     count    -  element count
   11.36 +  //     count   -  element count
   11.37 +  //     tmp     - scratch register
   11.38    //
   11.39    //     Destroy no registers!
   11.40    //
   11.41 -  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
   11.42 +  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
   11.43      BarrierSet* bs = Universe::heap()->barrier_set();
   11.44      switch (bs->kind()) {
   11.45        case BarrierSet::G1SATBCT:
   11.46        case BarrierSet::G1SATBCTLogging:
   11.47 -        {
   11.48 -          __ pusha();                      // push registers
   11.49 -          if (count == c_rarg0) {
   11.50 -            if (addr == c_rarg1) {
   11.51 -              // exactly backwards!!
   11.52 -              __ xchgptr(c_rarg1, c_rarg0);
   11.53 -            } else {
   11.54 -              __ movptr(c_rarg1, count);
   11.55 -              __ movptr(c_rarg0, addr);
   11.56 -            }
   11.57 -
   11.58 -          } else {
   11.59 -            __ movptr(c_rarg0, addr);
   11.60 -            __ movptr(c_rarg1, count);
   11.61 -          }
   11.62 -          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
   11.63 -          __ popa();
   11.64 +        // With G1, don't generate the call if we statically know that the target in uninitialized
   11.65 +        if (!dest_uninitialized) {
   11.66 +           __ pusha();                      // push registers
   11.67 +           if (count == c_rarg0) {
   11.68 +             if (addr == c_rarg1) {
   11.69 +               // exactly backwards!!
   11.70 +               __ xchgptr(c_rarg1, c_rarg0);
   11.71 +             } else {
   11.72 +               __ movptr(c_rarg1, count);
   11.73 +               __ movptr(c_rarg0, addr);
   11.74 +             }
   11.75 +           } else {
   11.76 +             __ movptr(c_rarg0, addr);
   11.77 +             __ movptr(c_rarg1, count);
   11.78 +           }
   11.79 +           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
   11.80 +           __ popa();
   11.81          }
   11.82 -        break;
   11.83 +         break;
   11.84        case BarrierSet::CardTableModRef:
   11.85        case BarrierSet::CardTableExtension:
   11.86        case BarrierSet::ModRef:
   11.87 @@ -1379,7 +1366,7 @@
   11.88    //   disjoint_byte_copy_entry is set to the no-overlap entry point
   11.89    //   used by generate_conjoint_byte_copy().
   11.90    //
   11.91 -  address generate_disjoint_byte_copy(bool aligned, const char *name) {
   11.92 +  address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
   11.93      __ align(CodeEntryAlignment);
   11.94      StubCodeMark mark(this, "StubRoutines", name);
   11.95      address start = __ pc();
   11.96 @@ -1399,9 +1386,11 @@
   11.97      __ enter(); // required for proper stackwalking of RuntimeStub frame
   11.98      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
   11.99  
  11.100 -    disjoint_byte_copy_entry = __ pc();
  11.101 -    BLOCK_COMMENT("Entry:");
  11.102 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.103 +    if (entry != NULL) {
  11.104 +      *entry = __ pc();
  11.105 +       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.106 +      BLOCK_COMMENT("Entry:");
  11.107 +    }
  11.108  
  11.109      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.110                        // r9 and r10 may be used to save non-volatile registers
  11.111 @@ -1479,7 +1468,8 @@
  11.112    // dwords or qwords that span cache line boundaries will still be loaded
  11.113    // and stored atomically.
  11.114    //
  11.115 -  address generate_conjoint_byte_copy(bool aligned, const char *name) {
  11.116 +  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
  11.117 +                                      address* entry, const char *name) {
  11.118      __ align(CodeEntryAlignment);
  11.119      StubCodeMark mark(this, "StubRoutines", name);
  11.120      address start = __ pc();
  11.121 @@ -1494,11 +1484,13 @@
  11.122      __ enter(); // required for proper stackwalking of RuntimeStub frame
  11.123      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.124  
  11.125 -    byte_copy_entry = __ pc();
  11.126 -    BLOCK_COMMENT("Entry:");
  11.127 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.128 -
  11.129 -    array_overlap_test(disjoint_byte_copy_entry, Address::times_1);
  11.130 +    if (entry != NULL) {
  11.131 +      *entry = __ pc();
  11.132 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.133 +      BLOCK_COMMENT("Entry:");
  11.134 +    }
  11.135 +
  11.136 +    array_overlap_test(nooverlap_target, Address::times_1);
  11.137      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.138                        // r9 and r10 may be used to save non-volatile registers
  11.139  
  11.140 @@ -1574,7 +1566,7 @@
  11.141    //   disjoint_short_copy_entry is set to the no-overlap entry point
  11.142    //   used by generate_conjoint_short_copy().
  11.143    //
  11.144 -  address generate_disjoint_short_copy(bool aligned, const char *name) {
  11.145 +  address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
  11.146      __ align(CodeEntryAlignment);
  11.147      StubCodeMark mark(this, "StubRoutines", name);
  11.148      address start = __ pc();
  11.149 @@ -1593,9 +1585,11 @@
  11.150      __ enter(); // required for proper stackwalking of RuntimeStub frame
  11.151      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.152  
  11.153 -    disjoint_short_copy_entry = __ pc();
  11.154 -    BLOCK_COMMENT("Entry:");
  11.155 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.156 +    if (entry != NULL) {
  11.157 +      *entry = __ pc();
  11.158 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.159 +      BLOCK_COMMENT("Entry:");
  11.160 +    }
  11.161  
  11.162      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.163                        // r9 and r10 may be used to save non-volatile registers
  11.164 @@ -1686,7 +1680,8 @@
  11.165    // or qwords that span cache line boundaries will still be loaded
  11.166    // and stored atomically.
  11.167    //
  11.168 -  address generate_conjoint_short_copy(bool aligned, const char *name) {
  11.169 +  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
  11.170 +                                       address *entry, const char *name) {
  11.171      __ align(CodeEntryAlignment);
  11.172      StubCodeMark mark(this, "StubRoutines", name);
  11.173      address start = __ pc();
  11.174 @@ -1701,11 +1696,13 @@
  11.175      __ enter(); // required for proper stackwalking of RuntimeStub frame
  11.176      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.177  
  11.178 -    short_copy_entry = __ pc();
  11.179 -    BLOCK_COMMENT("Entry:");
  11.180 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.181 -
  11.182 -    array_overlap_test(disjoint_short_copy_entry, Address::times_2);
  11.183 +    if (entry != NULL) {
  11.184 +      *entry = __ pc();
  11.185 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.186 +      BLOCK_COMMENT("Entry:");
  11.187 +    }
  11.188 +
  11.189 +    array_overlap_test(nooverlap_target, Address::times_2);
  11.190      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.191                        // r9 and r10 may be used to save non-volatile registers
  11.192  
  11.193 @@ -1773,7 +1770,8 @@
  11.194    //   disjoint_int_copy_entry is set to the no-overlap entry point
  11.195    //   used by generate_conjoint_int_oop_copy().
  11.196    //
  11.197 -  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  11.198 +  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
  11.199 +                                         const char *name, bool dest_uninitialized = false) {
  11.200      __ align(CodeEntryAlignment);
  11.201      StubCodeMark mark(this, "StubRoutines", name);
  11.202      address start = __ pc();
  11.203 @@ -1793,21 +1791,17 @@
  11.204      __ enter(); // required for proper stackwalking of RuntimeStub frame
  11.205      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.206  
  11.207 -    (is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry) = __ pc();
  11.208 -
  11.209 -    if (is_oop) {
  11.210 -      // no registers are destroyed by this call
  11.211 -      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
  11.212 +    if (entry != NULL) {
  11.213 +      *entry = __ pc();
  11.214 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.215 +      BLOCK_COMMENT("Entry:");
  11.216      }
  11.217  
  11.218 -    BLOCK_COMMENT("Entry:");
  11.219 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.220 -
  11.221      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.222                        // r9 and r10 may be used to save non-volatile registers
  11.223 -
  11.224      if (is_oop) {
  11.225        __ movq(saved_to, to);
  11.226 +      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
  11.227      }
  11.228  
  11.229      // 'from', 'to' and 'count' are now valid
  11.230 @@ -1867,7 +1861,9 @@
  11.231    // the hardware handle it.  The two dwords within qwords that span
  11.232    // cache line boundaries will still be loaded and stored atomicly.
  11.233    //
  11.234 -  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
  11.235 +  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
  11.236 +                                         address *entry, const char *name,
  11.237 +                                         bool dest_uninitialized = false) {
  11.238      __ align(CodeEntryAlignment);
  11.239      StubCodeMark mark(this, "StubRoutines", name);
  11.240      address start = __ pc();
  11.241 @@ -1882,20 +1878,21 @@
  11.242      __ enter(); // required for proper stackwalking of RuntimeStub frame
  11.243      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.244  
  11.245 +    if (entry != NULL) {
  11.246 +      *entry = __ pc();
  11.247 +       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.248 +      BLOCK_COMMENT("Entry:");
  11.249 +    }
  11.250 +
  11.251 +    array_overlap_test(nooverlap_target, Address::times_4);
  11.252 +    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.253 +                      // r9 and r10 may be used to save non-volatile registers
  11.254 +
  11.255      if (is_oop) {
  11.256        // no registers are destroyed by this call
  11.257 -      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
  11.258 +      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
  11.259      }
  11.260  
  11.261 -    (is_oop ? oop_copy_entry : int_copy_entry) = __ pc();
  11.262 -    BLOCK_COMMENT("Entry:");
  11.263 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.264 -
  11.265 -    array_overlap_test(is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry,
  11.266 -                       Address::times_4);
  11.267 -    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.268 -                      // r9 and r10 may be used to save non-volatile registers
  11.269 -
  11.270      assert_clean_int(count, rax); // Make sure 'count' is clean int.
  11.271      // 'from', 'to' and 'count' are now valid
  11.272      __ movptr(dword_count, count);
  11.273 @@ -1959,7 +1956,8 @@
  11.274    //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
  11.275    //   no-overlap entry point used by generate_conjoint_long_oop_copy().
  11.276    //
  11.277 -  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  11.278 +  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
  11.279 +                                          const char *name, bool dest_uninitialized = false) {
  11.280      __ align(CodeEntryAlignment);
  11.281      StubCodeMark mark(this, "StubRoutines", name);
  11.282      address start = __ pc();
  11.283 @@ -1978,20 +1976,19 @@
  11.284      // Save no-overlap entry point for generate_conjoint_long_oop_copy()
  11.285      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.286  
  11.287 -    if (is_oop) {
  11.288 -      disjoint_oop_copy_entry  = __ pc();
  11.289 -      // no registers are destroyed by this call
  11.290 -      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
  11.291 -    } else {
  11.292 -      disjoint_long_copy_entry = __ pc();
  11.293 +    if (entry != NULL) {
  11.294 +      *entry = __ pc();
  11.295 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.296 +      BLOCK_COMMENT("Entry:");
  11.297      }
  11.298 -    BLOCK_COMMENT("Entry:");
  11.299 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.300  
  11.301      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.302                        // r9 and r10 may be used to save non-volatile registers
  11.303 -
  11.304      // 'from', 'to' and 'qword_count' are now valid
  11.305 +    if (is_oop) {
  11.306 +      // no registers are destroyed by this call
  11.307 +      gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
  11.308 +    }
  11.309  
  11.310      // Copy from low to high addresses.  Use 'to' as scratch.
  11.311      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
  11.312 @@ -2045,7 +2042,9 @@
  11.313    //   c_rarg1   - destination array address
  11.314    //   c_rarg2   - element count, treated as ssize_t, can be zero
  11.315    //
  11.316 -  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
  11.317 +  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
  11.318 +                                          address nooverlap_target, address *entry,
  11.319 +                                          const char *name, bool dest_uninitialized = false) {
  11.320      __ align(CodeEntryAlignment);
  11.321      StubCodeMark mark(this, "StubRoutines", name);
  11.322      address start = __ pc();
  11.323 @@ -2059,31 +2058,21 @@
  11.324      __ enter(); // required for proper stackwalking of RuntimeStub frame
  11.325      assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
  11.326  
  11.327 -    address disjoint_copy_entry = NULL;
  11.328 -    if (is_oop) {
  11.329 -      assert(!UseCompressedOops, "shouldn't be called for compressed oops");
  11.330 -      disjoint_copy_entry = disjoint_oop_copy_entry;
  11.331 -      oop_copy_entry  = __ pc();
  11.332 -      array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
  11.333 -    } else {
  11.334 -      disjoint_copy_entry = disjoint_long_copy_entry;
  11.335 -      long_copy_entry = __ pc();
  11.336 -      array_overlap_test(disjoint_long_copy_entry, Address::times_8);
  11.337 +    if (entry != NULL) {
  11.338 +      *entry = __ pc();
  11.339 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.340 +      BLOCK_COMMENT("Entry:");
  11.341      }
  11.342 -    BLOCK_COMMENT("Entry:");
  11.343 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
  11.344 -
  11.345 -    array_overlap_test(disjoint_copy_entry, Address::times_8);
  11.346 +
  11.347 +    array_overlap_test(nooverlap_target, Address::times_8);
  11.348      setup_arg_regs(); // from => rdi, to => rsi, count => rdx
  11.349                        // r9 and r10 may be used to save non-volatile registers
  11.350 -
  11.351      // 'from', 'to' and 'qword_count' are now valid
  11.352 -
  11.353      if (is_oop) {
  11.354        // Save to and count for store barrier
  11.355        __ movptr(saved_count, qword_count);
  11.356        // No registers are destroyed by this call
  11.357 -      gen_write_ref_array_pre_barrier(to, saved_count);
  11.358 +      gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
  11.359      }
  11.360  
  11.361      __ jmp(L_copy_32_bytes);
  11.362 @@ -2162,7 +2151,8 @@
  11.363    //    rax ==  0  -  success
  11.364    //    rax == -1^K - failure, where K is partial transfer count
  11.365    //
  11.366 -  address generate_checkcast_copy(const char *name) {
  11.367 +  address generate_checkcast_copy(const char *name, address *entry,
  11.368 +                                  bool dest_uninitialized = false) {
  11.369  
  11.370      Label L_load_element, L_store_element, L_do_card_marks, L_done;
  11.371  
  11.372 @@ -2216,8 +2206,10 @@
  11.373  #endif
  11.374  
  11.375      // Caller of this entry point must set up the argument registers.
  11.376 -    checkcast_copy_entry  = __ pc();
  11.377 -    BLOCK_COMMENT("Entry:");
  11.378 +    if (entry != NULL) {
  11.379 +      *entry = __ pc();
  11.380 +      BLOCK_COMMENT("Entry:");
  11.381 +    }
  11.382  
  11.383      // allocate spill slots for r13, r14
  11.384      enum {
  11.385 @@ -2254,7 +2246,7 @@
  11.386      Address from_element_addr(end_from, count, TIMES_OOP, 0);
  11.387      Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
  11.388  
  11.389 -    gen_write_ref_array_pre_barrier(to, count);
  11.390 +    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
  11.391  
  11.392      // Copy from low to high addresses, indexed from the end of each array.
  11.393      __ lea(end_from, end_from_addr);
  11.394 @@ -2334,7 +2326,9 @@
  11.395    // Examines the alignment of the operands and dispatches
  11.396    // to a long, int, short, or byte copy loop.
  11.397    //
  11.398 -  address generate_unsafe_copy(const char *name) {
  11.399 +  address generate_unsafe_copy(const char *name,
  11.400 +                               address byte_copy_entry, address short_copy_entry,
  11.401 +                               address int_copy_entry, address long_copy_entry) {
  11.402  
  11.403      Label L_long_aligned, L_int_aligned, L_short_aligned;
  11.404  
  11.405 @@ -2432,7 +2426,10 @@
  11.406    //    rax ==  0  -  success
  11.407    //    rax == -1^K - failure, where K is partial transfer count
  11.408    //
  11.409 -  address generate_generic_copy(const char *name) {
  11.410 +  address generate_generic_copy(const char *name,
  11.411 +                                address byte_copy_entry, address short_copy_entry,
  11.412 +                                address int_copy_entry, address long_copy_entry,
  11.413 +                                address oop_copy_entry, address checkcast_copy_entry) {
  11.414  
  11.415      Label L_failed, L_failed_0, L_objArray;
  11.416      Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
  11.417 @@ -2725,33 +2722,75 @@
  11.418    }
  11.419  
  11.420    void generate_arraycopy_stubs() {
  11.421 -    // Call the conjoint generation methods immediately after
  11.422 -    // the disjoint ones so that short branches from the former
  11.423 -    // to the latter can be generated.
  11.424 -    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
  11.425 -    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
  11.426 -
  11.427 -    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
  11.428 -    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
  11.429 -
  11.430 -    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
  11.431 -    StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
  11.432 -
  11.433 -    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
  11.434 -    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
  11.435 +    address entry;
  11.436 +    address entry_jbyte_arraycopy;
  11.437 +    address entry_jshort_arraycopy;
  11.438 +    address entry_jint_arraycopy;
  11.439 +    address entry_oop_arraycopy;
  11.440 +    address entry_jlong_arraycopy;
  11.441 +    address entry_checkcast_arraycopy;
  11.442 +
  11.443 +    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, &entry,
  11.444 +                                                                           "jbyte_disjoint_arraycopy");
  11.445 +    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
  11.446 +                                                                           "jbyte_arraycopy");
  11.447 +
  11.448 +    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
  11.449 +                                                                            "jshort_disjoint_arraycopy");
  11.450 +    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
  11.451 +                                                                            "jshort_arraycopy");
  11.452 +
  11.453 +    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
  11.454 +                                                                              "jint_disjoint_arraycopy");
  11.455 +    StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
  11.456 +                                                                              &entry_jint_arraycopy, "jint_arraycopy");
  11.457 +
  11.458 +    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
  11.459 +                                                                               "jlong_disjoint_arraycopy");
  11.460 +    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
  11.461 +                                                                               &entry_jlong_arraycopy, "jlong_arraycopy");
  11.462  
  11.463  
  11.464      if (UseCompressedOops) {
  11.465 -      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
  11.466 -      StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
  11.467 +      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
  11.468 +                                                                              "oop_disjoint_arraycopy");
  11.469 +      StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
  11.470 +                                                                              &entry_oop_arraycopy, "oop_arraycopy");
  11.471 +      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_int_oop_copy(false, true, &entry,
  11.472 +                                                                                     "oop_disjoint_arraycopy_uninit",
  11.473 +                                                                                     /*dest_uninitialized*/true);
  11.474 +      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_int_oop_copy(false, true, entry,
  11.475 +                                                                                     NULL, "oop_arraycopy_uninit",
  11.476 +                                                                                     /*dest_uninitialized*/true);
  11.477      } else {
  11.478 -      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
  11.479 -      StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
  11.480 +      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
  11.481 +                                                                               "oop_disjoint_arraycopy");
  11.482 +      StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
  11.483 +                                                                               &entry_oop_arraycopy, "oop_arraycopy");
  11.484 +      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_long_oop_copy(false, true, &entry,
  11.485 +                                                                                      "oop_disjoint_arraycopy_uninit",
  11.486 +                                                                                      /*dest_uninitialized*/true);
  11.487 +      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_long_oop_copy(false, true, entry,
  11.488 +                                                                                      NULL, "oop_arraycopy_uninit",
  11.489 +                                                                                      /*dest_uninitialized*/true);
  11.490      }
  11.491  
  11.492 -    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
  11.493 -    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
  11.494 -    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
  11.495 +    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
  11.496 +    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
  11.497 +                                                                        /*dest_uninitialized*/true);
  11.498 +
  11.499 +    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
  11.500 +                                                              entry_jbyte_arraycopy,
  11.501 +                                                              entry_jshort_arraycopy,
  11.502 +                                                              entry_jint_arraycopy,
  11.503 +                                                              entry_jlong_arraycopy);
  11.504 +    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
  11.505 +                                                               entry_jbyte_arraycopy,
  11.506 +                                                               entry_jshort_arraycopy,
  11.507 +                                                               entry_jint_arraycopy,
  11.508 +                                                               entry_oop_arraycopy,
  11.509 +                                                               entry_jlong_arraycopy,
  11.510 +                                                               entry_checkcast_arraycopy);
  11.511  
  11.512      StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
  11.513      StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
  11.514 @@ -2776,6 +2815,9 @@
  11.515  
  11.516      StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
  11.517      StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
  11.518 +
  11.519 +    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
  11.520 +    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
  11.521    }
  11.522  
  11.523    void generate_math_stubs() {
  11.524 @@ -3069,20 +3111,6 @@
  11.525    }
  11.526  }; // end class declaration
  11.527  
  11.528 -address StubGenerator::disjoint_byte_copy_entry  = NULL;
  11.529 -address StubGenerator::disjoint_short_copy_entry = NULL;
  11.530 -address StubGenerator::disjoint_int_copy_entry   = NULL;
  11.531 -address StubGenerator::disjoint_long_copy_entry  = NULL;
  11.532 -address StubGenerator::disjoint_oop_copy_entry   = NULL;
  11.533 -
  11.534 -address StubGenerator::byte_copy_entry  = NULL;
  11.535 -address StubGenerator::short_copy_entry = NULL;
  11.536 -address StubGenerator::int_copy_entry   = NULL;
  11.537 -address StubGenerator::long_copy_entry  = NULL;
  11.538 -address StubGenerator::oop_copy_entry   = NULL;
  11.539 -
  11.540 -address StubGenerator::checkcast_copy_entry = NULL;
  11.541 -
  11.542  void StubGenerator_generate(CodeBuffer* code, bool all) {
  11.543    StubGenerator g(code, all);
  11.544  }
    12.1 --- a/src/cpu/x86/vm/x86_32.ad	Thu Mar 03 21:02:56 2011 -0800
    12.2 +++ b/src/cpu/x86/vm/x86_32.ad	Thu Mar 03 23:31:45 2011 -0800
    12.3 @@ -1,5 +1,5 @@
    12.4  //
    12.5 -// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    12.6 +// Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    12.7  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    12.8  //
    12.9  // This code is free software; you can redistribute it and/or modify it
   12.10 @@ -12658,17 +12658,46 @@
   12.11    ins_pipe( pipe_slow );
   12.12  %}
   12.13  
   12.14 +// fast search of substring with known size.
   12.15 +instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
   12.16 +                            eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
   12.17 +  predicate(UseSSE42Intrinsics);
   12.18 +  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   12.19 +  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
   12.20 +
   12.21 +  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
   12.22 +  ins_encode %{
   12.23 +    int icnt2 = (int)$int_cnt2$$constant;
   12.24 +    if (icnt2 >= 8) {
   12.25 +      // IndexOf for constant substrings with size >= 8 elements
   12.26 +      // which don't need to be loaded through stack.
   12.27 +      __ string_indexofC8($str1$$Register, $str2$$Register,
   12.28 +                          $cnt1$$Register, $cnt2$$Register,
   12.29 +                          icnt2, $result$$Register,
   12.30 +                          $vec$$XMMRegister, $tmp$$Register);
   12.31 +    } else {
   12.32 +      // Small strings are loaded through stack if they cross page boundary.
   12.33 +      __ string_indexof($str1$$Register, $str2$$Register,
   12.34 +                        $cnt1$$Register, $cnt2$$Register,
   12.35 +                        icnt2, $result$$Register,
   12.36 +                        $vec$$XMMRegister, $tmp$$Register);
   12.37 +    }
   12.38 +  %}
   12.39 +  ins_pipe( pipe_slow );
   12.40 +%}
   12.41 +
   12.42  instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
   12.43 -                        eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
   12.44 +                        eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
   12.45    predicate(UseSSE42Intrinsics);
   12.46    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   12.47 -  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
   12.48 -
   12.49 -  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
   12.50 +  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
   12.51 +
   12.52 +  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
   12.53    ins_encode %{
   12.54      __ string_indexof($str1$$Register, $str2$$Register,
   12.55 -                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
   12.56 -                      $tmp1$$XMMRegister, $tmp2$$Register);
   12.57 +                      $cnt1$$Register, $cnt2$$Register,
   12.58 +                      (-1), $result$$Register,
   12.59 +                      $vec$$XMMRegister, $tmp$$Register);
   12.60    %}
   12.61    ins_pipe( pipe_slow );
   12.62  %}
    13.1 --- a/src/cpu/x86/vm/x86_64.ad	Thu Mar 03 21:02:56 2011 -0800
    13.2 +++ b/src/cpu/x86/vm/x86_64.ad	Thu Mar 03 23:31:45 2011 -0800
    13.3 @@ -1,5 +1,5 @@
    13.4  //
    13.5 -// Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
    13.6 +// Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
    13.7  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    13.8  //
    13.9  // This code is free software; you can redistribute it and/or modify it
   13.10 @@ -11598,18 +11598,48 @@
   13.11    ins_pipe( pipe_slow );
   13.12  %}
   13.13  
   13.14 +// fast search of substring with known size.
   13.15 +instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
   13.16 +                            rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
   13.17 +%{
   13.18 +  predicate(UseSSE42Intrinsics);
   13.19 +  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   13.20 +  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
   13.21 +
   13.22 +  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
   13.23 +  ins_encode %{
   13.24 +    int icnt2 = (int)$int_cnt2$$constant;
   13.25 +    if (icnt2 >= 8) {
   13.26 +      // IndexOf for constant substrings with size >= 8 elements
   13.27 +      // which don't need to be loaded through stack.
   13.28 +      __ string_indexofC8($str1$$Register, $str2$$Register,
   13.29 +                          $cnt1$$Register, $cnt2$$Register,
   13.30 +                          icnt2, $result$$Register,
   13.31 +                          $vec$$XMMRegister, $tmp$$Register);
   13.32 +    } else {
   13.33 +      // Small strings are loaded through stack if they cross page boundary.
   13.34 +      __ string_indexof($str1$$Register, $str2$$Register,
   13.35 +                        $cnt1$$Register, $cnt2$$Register,
   13.36 +                        icnt2, $result$$Register,
   13.37 +                        $vec$$XMMRegister, $tmp$$Register);
   13.38 +    }
   13.39 +  %}
   13.40 +  ins_pipe( pipe_slow );
   13.41 +%}
   13.42 +
   13.43  instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
   13.44 -                        rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
   13.45 +                        rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
   13.46  %{
   13.47    predicate(UseSSE42Intrinsics);
   13.48    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   13.49 -  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
   13.50 -
   13.51 -  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
   13.52 +  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
   13.53 +
   13.54 +  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
   13.55    ins_encode %{
   13.56      __ string_indexof($str1$$Register, $str2$$Register,
   13.57 -                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
   13.58 -                      $tmp1$$XMMRegister, $tmp2$$Register);
   13.59 +                      $cnt1$$Register, $cnt2$$Register,
   13.60 +                      (-1), $result$$Register,
   13.61 +                      $vec$$XMMRegister, $tmp$$Register);
   13.62    %}
   13.63    ins_pipe( pipe_slow );
   13.64  %}
    14.1 --- a/src/share/tools/hsdis/hsdis-demo.c	Thu Mar 03 21:02:56 2011 -0800
    14.2 +++ b/src/share/tools/hsdis/hsdis-demo.c	Thu Mar 03 23:31:45 2011 -0800
    14.3 @@ -22,8 +22,6 @@
    14.4   *
    14.5   */
    14.6  
    14.7 -#include "precompiled.hpp"
    14.8 -
    14.9  /* hsdis-demo.c -- dump a range of addresses as native instructions
   14.10     This demonstrates the protocol required by the HotSpot PrintAssembly option.
   14.11  */
    15.1 --- a/src/share/tools/hsdis/hsdis.c	Thu Mar 03 21:02:56 2011 -0800
    15.2 +++ b/src/share/tools/hsdis/hsdis.c	Thu Mar 03 23:31:45 2011 -0800
    15.3 @@ -22,8 +22,6 @@
    15.4   *
    15.5   */
    15.6  
    15.7 -#include "precompiled.hpp"
    15.8 -
    15.9  /* hsdis.c -- dump a range of addresses as native instructions
   15.10     This implements the plugin protocol required by the
   15.11     HotSpot PrintAssembly option.
    16.1 --- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Mar 03 21:02:56 2011 -0800
    16.2 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Mar 03 23:31:45 2011 -0800
    16.3 @@ -3308,22 +3308,23 @@
    16.4    Value exception = append_with_bci(new ExceptionObject(), SynchronizationEntryBCI);
    16.5    assert(exception->is_pinned(), "must be");
    16.6  
    16.7 +  int bci = SynchronizationEntryBCI;
    16.8    if (compilation()->env()->dtrace_method_probes()) {
    16.9 -    // Report exit from inline methods
   16.10 +    // Report exit from inline methods.  We don't have a stream here
   16.11 +    // so pass an explicit bci of SynchronizationEntryBCI.
   16.12      Values* args = new Values(1);
   16.13 -    args->push(append(new Constant(new ObjectConstant(method()))));
   16.14 -    append(new RuntimeCall(voidType, "dtrace_method_exit", CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), args));
   16.15 +    args->push(append_with_bci(new Constant(new ObjectConstant(method())), bci));
   16.16 +    append_with_bci(new RuntimeCall(voidType, "dtrace_method_exit", CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), args), bci);
   16.17    }
   16.18  
   16.19 -  int bci = SynchronizationEntryBCI;
   16.20    if (lock) {
   16.21      assert(state()->locks_size() > 0 && state()->lock_at(state()->locks_size() - 1) == lock, "lock is missing");
   16.22      if (!lock->is_linked()) {
   16.23 -      lock = append_with_bci(lock, -1);
   16.24 +      lock = append_with_bci(lock, bci);
   16.25      }
   16.26  
   16.27      // exit the monitor in the context of the synchronized method
   16.28 -    monitorexit(lock, SynchronizationEntryBCI);
   16.29 +    monitorexit(lock, bci);
   16.30  
   16.31      // exit the context of the synchronized method
   16.32      if (!default_handler) {
    17.1 --- a/src/share/vm/c1/c1_Runtime1.cpp	Thu Mar 03 21:02:56 2011 -0800
    17.2 +++ b/src/share/vm/c1/c1_Runtime1.cpp	Thu Mar 03 23:31:45 2011 -0800
    17.3 @@ -426,10 +426,9 @@
    17.4  // been deoptimized. If that is the case we return the deopt blob
    17.5  // unpack_with_exception entry instead. This makes life for the exception blob easier
    17.6  // because making that same check and diverting is painful from assembly language.
    17.7 -//
    17.8 -
    17.9 -
   17.10  JRT_ENTRY_NO_ASYNC(static address, exception_handler_for_pc_helper(JavaThread* thread, oopDesc* ex, address pc, nmethod*& nm))
   17.11 +  // Reset method handle flag.
   17.12 +  thread->set_is_method_handle_return(false);
   17.13  
   17.14    Handle exception(thread, ex);
   17.15    nm = CodeCache::find_nmethod(pc);
   17.16 @@ -480,11 +479,12 @@
   17.17      return SharedRuntime::deopt_blob()->unpack_with_exception_in_tls();
   17.18    }
   17.19  
   17.20 -  // ExceptionCache is used only for exceptions at call and not for implicit exceptions
   17.21 +  // ExceptionCache is used only for exceptions at call sites and not for implicit exceptions
   17.22    if (guard_pages_enabled) {
   17.23      address fast_continuation = nm->handler_for_exception_and_pc(exception, pc);
   17.24      if (fast_continuation != NULL) {
   17.25 -      if (fast_continuation == ExceptionCache::unwind_handler()) fast_continuation = NULL;
   17.26 +      // Set flag if return address is a method handle call site.
   17.27 +      thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
   17.28        return fast_continuation;
   17.29      }
   17.30    }
   17.31 @@ -522,14 +522,14 @@
   17.32      thread->set_exception_pc(pc);
   17.33  
   17.34      // the exception cache is used only by non-implicit exceptions
   17.35 -    if (continuation == NULL) {
   17.36 -      nm->add_handler_for_exception_and_pc(exception, pc, ExceptionCache::unwind_handler());
   17.37 -    } else {
   17.38 +    if (continuation != NULL) {
   17.39        nm->add_handler_for_exception_and_pc(exception, pc, continuation);
   17.40      }
   17.41    }
   17.42  
   17.43    thread->set_vm_result(exception());
   17.44 +  // Set flag if return address is a method handle call site.
   17.45 +  thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
   17.46  
   17.47    if (TraceExceptions) {
   17.48      ttyLocker ttyl;
   17.49 @@ -542,20 +542,19 @@
   17.50  JRT_END
   17.51  
   17.52  // Enter this method from compiled code only if there is a Java exception handler
   17.53 -// in the method handling the exception
   17.54 +// in the method handling the exception.
   17.55  // We are entering here from exception stub. We don't do a normal VM transition here.
   17.56  // We do it in a helper. This is so we can check to see if the nmethod we have just
   17.57  // searched for an exception handler has been deoptimized in the meantime.
   17.58 -address  Runtime1::exception_handler_for_pc(JavaThread* thread) {
   17.59 +address Runtime1::exception_handler_for_pc(JavaThread* thread) {
   17.60    oop exception = thread->exception_oop();
   17.61    address pc = thread->exception_pc();
   17.62    // Still in Java mode
   17.63 -  debug_only(ResetNoHandleMark rnhm);
   17.64 +  DEBUG_ONLY(ResetNoHandleMark rnhm);
   17.65    nmethod* nm = NULL;
   17.66    address continuation = NULL;
   17.67    {
   17.68      // Enter VM mode by calling the helper
   17.69 -
   17.70      ResetNoHandleMark rnhm;
   17.71      continuation = exception_handler_for_pc_helper(thread, exception, pc, nm);
   17.72    }
   17.73 @@ -563,11 +562,11 @@
   17.74  
   17.75    // Now check to see if the nmethod we were called from is now deoptimized.
   17.76    // If so we must return to the deopt blob and deoptimize the nmethod
   17.77 -
   17.78    if (nm != NULL && caller_is_deopted()) {
   17.79      continuation = SharedRuntime::deopt_blob()->unpack_with_exception_in_tls();
   17.80    }
   17.81  
   17.82 +  assert(continuation != NULL, "no handler found");
   17.83    return continuation;
   17.84  }
   17.85  
    18.1 --- a/src/share/vm/c1/c1_Runtime1.hpp	Thu Mar 03 21:02:56 2011 -0800
    18.2 +++ b/src/share/vm/c1/c1_Runtime1.hpp	Thu Mar 03 23:31:45 2011 -0800
    18.3 @@ -54,6 +54,7 @@
    18.4    stub(new_multi_array)              \
    18.5    stub(handle_exception_nofpu)         /* optimized version that does not preserve fpu registers */ \
    18.6    stub(handle_exception)             \
    18.7 +  stub(handle_exception_from_callee) \
    18.8    stub(throw_array_store_exception)  \
    18.9    stub(throw_class_cast_exception)   \
   18.10    stub(throw_incompatible_class_change_error)   \
   18.11 @@ -116,11 +117,11 @@
   18.12    static const char* _blob_names[];
   18.13  
   18.14    // stub generation
   18.15 -  static void generate_blob_for(BufferBlob* blob, StubID id);
   18.16 -  static OopMapSet* generate_code_for(StubID id, StubAssembler* masm);
   18.17 +  static void       generate_blob_for(BufferBlob* blob, StubID id);
   18.18 +  static OopMapSet* generate_code_for(StubID id, StubAssembler* sasm);
   18.19    static OopMapSet* generate_exception_throw(StubAssembler* sasm, address target, bool has_argument);
   18.20 -  static void generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_maps, OopMap* oop_map, bool ignore_fpu_registers = false);
   18.21 -  static void generate_unwind_exception(StubAssembler *sasm);
   18.22 +  static OopMapSet* generate_handle_exception(StubID id, StubAssembler* sasm);
   18.23 +  static void       generate_unwind_exception(StubAssembler *sasm);
   18.24    static OopMapSet* generate_patching(StubAssembler* sasm, address target);
   18.25  
   18.26    static OopMapSet* generate_stub_call(StubAssembler* sasm, Register result, address entry,
    19.1 --- a/src/share/vm/classfile/classLoader.cpp	Thu Mar 03 21:02:56 2011 -0800
    19.2 +++ b/src/share/vm/classfile/classLoader.cpp	Thu Mar 03 23:31:45 2011 -0800
    19.3 @@ -1332,7 +1332,7 @@
    19.4        }
    19.5  
    19.6        if (_compile_the_world_counter >= CompileTheWorldStartAt) {
    19.7 -        if (k.is_null() || (exception_occurred && !CompileTheWorldIgnoreInitErrors)) {
    19.8 +        if (k.is_null() || exception_occurred) {
    19.9            // If something went wrong (e.g. ExceptionInInitializerError) we skip this class
   19.10            tty->print_cr("CompileTheWorld (%d) : Skipping %s", _compile_the_world_counter, buffer);
   19.11          } else {
    20.1 --- a/src/share/vm/code/nmethod.cpp	Thu Mar 03 21:02:56 2011 -0800
    20.2 +++ b/src/share/vm/code/nmethod.cpp	Thu Mar 03 23:31:45 2011 -0800
    20.3 @@ -190,15 +190,10 @@
    20.4  } nmethod_stats;
    20.5  #endif //PRODUCT
    20.6  
    20.7 +
    20.8  //---------------------------------------------------------------------------------
    20.9  
   20.10  
   20.11 -// The _unwind_handler is a special marker address, which says that
   20.12 -// for given exception oop and address, the frame should be removed
   20.13 -// as the tuple cannot be caught in the nmethod
   20.14 -address ExceptionCache::_unwind_handler = (address) -1;
   20.15 -
   20.16 -
   20.17  ExceptionCache::ExceptionCache(Handle exception, address pc, address handler) {
   20.18    assert(pc != NULL, "Must be non null");
   20.19    assert(exception.not_null(), "Must be non null");
    21.1 --- a/src/share/vm/code/nmethod.hpp	Thu Mar 03 21:02:56 2011 -0800
    21.2 +++ b/src/share/vm/code/nmethod.hpp	Thu Mar 03 23:31:45 2011 -0800
    21.3 @@ -34,7 +34,6 @@
    21.4  class ExceptionCache : public CHeapObj {
    21.5    friend class VMStructs;
    21.6   private:
    21.7 -  static address _unwind_handler;
    21.8    enum { cache_size = 16 };
    21.9    klassOop _exception_type;
   21.10    address  _pc[cache_size];
   21.11 @@ -62,8 +61,6 @@
   21.12    bool    match_exception_with_space(Handle exception) ;
   21.13    address test_address(address addr);
   21.14    bool    add_address_and_handler(address addr, address handler) ;
   21.15 -
   21.16 -  static address unwind_handler() { return _unwind_handler; }
   21.17  };
   21.18  
   21.19  
    22.1 --- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Mar 03 21:02:56 2011 -0800
    22.2 +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Mar 03 23:31:45 2011 -0800
    22.3 @@ -83,11 +83,15 @@
    22.4    }
    22.5  
    22.6    template <class T> void write_ref_array_pre_work(T* dst, int count);
    22.7 -  virtual void write_ref_array_pre(oop* dst, int count) {
    22.8 -    write_ref_array_pre_work(dst, count);
    22.9 +  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
   22.10 +    if (!dest_uninitialized) {
   22.11 +      write_ref_array_pre_work(dst, count);
   22.12 +    }
   22.13    }
   22.14 -  virtual void write_ref_array_pre(narrowOop* dst, int count) {
   22.15 -    write_ref_array_pre_work(dst, count);
   22.16 +  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
   22.17 +    if (!dest_uninitialized) {
   22.18 +      write_ref_array_pre_work(dst, count);
   22.19 +    }
   22.20    }
   22.21  };
   22.22  
    23.1 --- a/src/share/vm/memory/barrierSet.cpp	Thu Mar 03 21:02:56 2011 -0800
    23.2 +++ b/src/share/vm/memory/barrierSet.cpp	Thu Mar 03 23:31:45 2011 -0800
    23.3 @@ -35,9 +35,9 @@
    23.4                     start,            count);
    23.5  #endif
    23.6    if (UseCompressedOops) {
    23.7 -    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count);
    23.8 +    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count, false);
    23.9    } else {
   23.10 -    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count);
   23.11 +    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count, false);
   23.12    }
   23.13  }
   23.14  
    24.1 --- a/src/share/vm/memory/barrierSet.hpp	Thu Mar 03 21:02:56 2011 -0800
    24.2 +++ b/src/share/vm/memory/barrierSet.hpp	Thu Mar 03 23:31:45 2011 -0800
    24.3 @@ -1,5 +1,5 @@
    24.4  /*
    24.5 - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
    24.6 + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
    24.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    24.8   *
    24.9   * This code is free software; you can redistribute it and/or modify it
   24.10 @@ -44,6 +44,10 @@
   24.11      Uninit
   24.12    };
   24.13  
   24.14 +  enum Flags {
   24.15 +    None                = 0,
   24.16 +    TargetUninitialized = 1
   24.17 +  };
   24.18  protected:
   24.19    int _max_covered_regions;
   24.20    Name _kind;
   24.21 @@ -128,8 +132,10 @@
   24.22    virtual void read_prim_array(MemRegion mr) = 0;
   24.23  
   24.24    // Below length is the # array elements being written
   24.25 -  virtual void write_ref_array_pre(      oop* dst, int length) {}
   24.26 -  virtual void write_ref_array_pre(narrowOop* dst, int length) {}
   24.27 +  virtual void write_ref_array_pre(oop* dst, int length,
   24.28 +                                   bool dest_uninitialized = false) {}
   24.29 +  virtual void write_ref_array_pre(narrowOop* dst, int length,
   24.30 +                                   bool dest_uninitialized = false) {}
   24.31    // Below count is the # array elements being written, starting
   24.32    // at the address "start", which may not necessarily be HeapWord-aligned
   24.33    inline void write_ref_array(HeapWord* start, size_t count);
    25.1 --- a/src/share/vm/opto/library_call.cpp	Thu Mar 03 21:02:56 2011 -0800
    25.2 +++ b/src/share/vm/opto/library_call.cpp	Thu Mar 03 23:31:45 2011 -0800
    25.3 @@ -1,5 +1,5 @@
    25.4  /*
    25.5 - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
    25.6 + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
    25.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    25.8   *
    25.9   * This code is free software; you can redistribute it and/or modify it
   25.10 @@ -97,7 +97,7 @@
   25.11                               RegionNode* region);
   25.12    Node* generate_current_thread(Node* &tls_output);
   25.13    address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
   25.14 -                              bool disjoint_bases, const char* &name);
   25.15 +                              bool disjoint_bases, const char* &name, bool dest_uninitialized);
   25.16    Node* load_mirror_from_klass(Node* klass);
   25.17    Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
   25.18                                        int nargs,
   25.19 @@ -212,26 +212,26 @@
   25.20                                  AllocateNode* alloc,
   25.21                                  Node* src,  Node* src_offset,
   25.22                                  Node* dest, Node* dest_offset,
   25.23 -                                Node* dest_size);
   25.24 +                                Node* dest_size, bool dest_uninitialized);
   25.25    void generate_slow_arraycopy(const TypePtr* adr_type,
   25.26                                 Node* src,  Node* src_offset,
   25.27                                 Node* dest, Node* dest_offset,
   25.28 -                               Node* copy_length);
   25.29 +                               Node* copy_length, bool dest_uninitialized);
   25.30    Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
   25.31                                       Node* dest_elem_klass,
   25.32                                       Node* src,  Node* src_offset,
   25.33                                       Node* dest, Node* dest_offset,
   25.34 -                                     Node* copy_length);
   25.35 +                                     Node* copy_length, bool dest_uninitialized);
   25.36    Node* generate_generic_arraycopy(const TypePtr* adr_type,
   25.37                                     Node* src,  Node* src_offset,
   25.38                                     Node* dest, Node* dest_offset,
   25.39 -                                   Node* copy_length);
   25.40 +                                   Node* copy_length, bool dest_uninitialized);
   25.41    void generate_unchecked_arraycopy(const TypePtr* adr_type,
   25.42                                      BasicType basic_elem_type,
   25.43                                      bool disjoint_bases,
   25.44                                      Node* src,  Node* src_offset,
   25.45                                      Node* dest, Node* dest_offset,
   25.46 -                                    Node* copy_length);
   25.47 +                                    Node* copy_length, bool dest_uninitialized);
   25.48    bool inline_unsafe_CAS(BasicType type);
   25.49    bool inline_unsafe_ordered_store(BasicType type);
   25.50    bool inline_fp_conversions(vmIntrinsics::ID id);
   25.51 @@ -1193,7 +1193,7 @@
   25.52    Node* result;
   25.53    // Disable the use of pcmpestri until it can be guaranteed that
   25.54    // the load doesn't cross into the uncommited space.
   25.55 -  if (false && Matcher::has_match_rule(Op_StrIndexOf) &&
   25.56 +  if (Matcher::has_match_rule(Op_StrIndexOf) &&
   25.57        UseSSE42Intrinsics) {
   25.58      // Generate SSE4.2 version of indexOf
   25.59      // We currently only have match rules that use SSE4.2
   25.60 @@ -1211,14 +1211,14 @@
   25.61        return true;
   25.62      }
   25.63  
   25.64 +    ciInstanceKlass* str_klass = env()->String_klass();
   25.65 +    const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(str_klass);
   25.66 +
   25.67      // Make the merge point
   25.68 -    RegionNode* result_rgn = new (C, 3) RegionNode(3);
   25.69 -    Node*       result_phi = new (C, 3) PhiNode(result_rgn, TypeInt::INT);
   25.70 +    RegionNode* result_rgn = new (C, 4) RegionNode(4);
   25.71 +    Node*       result_phi = new (C, 4) PhiNode(result_rgn, TypeInt::INT);
   25.72      Node* no_ctrl  = NULL;
   25.73  
   25.74 -    ciInstanceKlass* klass = env()->String_klass();
   25.75 -    const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(klass);
   25.76 -
   25.77      // Get counts for string and substr
   25.78      Node* source_cnta = basic_plus_adr(receiver, receiver, count_offset);
   25.79      Node* source_cnt  = make_load(no_ctrl, source_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
   25.80 @@ -1236,6 +1236,17 @@
   25.81      }
   25.82  
   25.83      if (!stopped()) {
   25.84 +      // Check for substr count == 0
   25.85 +      cmp = _gvn.transform( new(C, 3) CmpINode(substr_cnt, intcon(0)) );
   25.86 +      bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
   25.87 +      Node* if_zero = generate_slow_guard(bol, NULL);
   25.88 +      if (if_zero != NULL) {
   25.89 +        result_phi->init_req(3, intcon(0));
   25.90 +        result_rgn->init_req(3, if_zero);
   25.91 +      }
   25.92 +    }
   25.93 +
   25.94 +    if (!stopped()) {
   25.95        result = make_string_method_node(Op_StrIndexOf, receiver, source_cnt, argument, substr_cnt);
   25.96        result_phi->init_req(1, result);
   25.97        result_rgn->init_req(1, control());
   25.98 @@ -1244,8 +1255,8 @@
   25.99      record_for_igvn(result_rgn);
  25.100      result = _gvn.transform(result_phi);
  25.101  
  25.102 -  } else { //Use LibraryCallKit::string_indexOf
  25.103 -    // don't intrinsify is argument isn't a constant string.
  25.104 +  } else { // Use LibraryCallKit::string_indexOf
  25.105 +    // don't intrinsify if argument isn't a constant string.
  25.106      if (!argument->is_Con()) {
  25.107       return false;
  25.108      }
  25.109 @@ -1281,7 +1292,7 @@
  25.110      // No null check on the argument is needed since it's a constant String oop.
  25.111      _sp -= 2;
  25.112      if (stopped()) {
  25.113 -     return true;
  25.114 +      return true;
  25.115      }
  25.116  
  25.117      // The null string as a pattern always returns 0 (match at beginning of string)
  25.118 @@ -4081,7 +4092,8 @@
  25.119    const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
  25.120    bool disjoint_bases = true;
  25.121    generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
  25.122 -                               src, NULL, dest, NULL, countx);
  25.123 +                               src, NULL, dest, NULL, countx,
  25.124 +                               /*dest_uninitialized*/true);
  25.125  
  25.126    // If necessary, emit some card marks afterwards.  (Non-arrays only.)
  25.127    if (card_mark) {
  25.128 @@ -4295,7 +4307,7 @@
  25.129  // Note:  The condition "disjoint" applies also for overlapping copies
  25.130  // where an descending copy is permitted (i.e., dest_offset <= src_offset).
  25.131  static address
  25.132 -select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name) {
  25.133 +select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name, bool dest_uninitialized) {
  25.134    int selector =
  25.135      (aligned  ? COPYFUNC_ALIGNED  : COPYFUNC_UNALIGNED) +
  25.136      (disjoint ? COPYFUNC_DISJOINT : COPYFUNC_CONJOINT);
  25.137 @@ -4304,6 +4316,10 @@
  25.138    name = #xxx_arraycopy; \
  25.139    return StubRoutines::xxx_arraycopy(); }
  25.140  
  25.141 +#define RETURN_STUB_PARM(xxx_arraycopy, parm) {           \
  25.142 +  name = #xxx_arraycopy; \
  25.143 +  return StubRoutines::xxx_arraycopy(parm); }
  25.144 +
  25.145    switch (t) {
  25.146    case T_BYTE:
  25.147    case T_BOOLEAN:
  25.148 @@ -4340,10 +4356,10 @@
  25.149    case T_ARRAY:
  25.150    case T_OBJECT:
  25.151      switch (selector) {
  25.152 -    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_arraycopy);
  25.153 -    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_arraycopy);
  25.154 -    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_disjoint_arraycopy);
  25.155 -    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_disjoint_arraycopy);
  25.156 +    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB_PARM(oop_arraycopy, dest_uninitialized);
  25.157 +    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB_PARM(arrayof_oop_arraycopy, dest_uninitialized);
  25.158 +    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB_PARM(oop_disjoint_arraycopy, dest_uninitialized);
  25.159 +    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB_PARM(arrayof_oop_disjoint_arraycopy, dest_uninitialized);
  25.160      }
  25.161    default:
  25.162      ShouldNotReachHere();
  25.163 @@ -4351,6 +4367,7 @@
  25.164    }
  25.165  
  25.166  #undef RETURN_STUB
  25.167 +#undef RETURN_STUB_PARM
  25.168  }
  25.169  
  25.170  //------------------------------basictype2arraycopy----------------------------
  25.171 @@ -4358,7 +4375,8 @@
  25.172                                              Node* src_offset,
  25.173                                              Node* dest_offset,
  25.174                                              bool disjoint_bases,
  25.175 -                                            const char* &name) {
  25.176 +                                            const char* &name,
  25.177 +                                            bool dest_uninitialized) {
  25.178    const TypeInt* src_offset_inttype  = gvn().find_int_type(src_offset);;
  25.179    const TypeInt* dest_offset_inttype = gvn().find_int_type(dest_offset);;
  25.180  
  25.181 @@ -4384,7 +4402,7 @@
  25.182      disjoint = true;
  25.183    }
  25.184  
  25.185 -  return select_arraycopy_function(t, aligned, disjoint, name);
  25.186 +  return select_arraycopy_function(t, aligned, disjoint, name, dest_uninitialized);
  25.187  }
  25.188  
  25.189  
  25.190 @@ -4440,7 +4458,8 @@
  25.191      // The component types are not the same or are not recognized.  Punt.
  25.192      // (But, avoid the native method wrapper to JVM_ArrayCopy.)
  25.193      generate_slow_arraycopy(TypePtr::BOTTOM,
  25.194 -                            src, src_offset, dest, dest_offset, length);
  25.195 +                            src, src_offset, dest, dest_offset, length,
  25.196 +                            /*dest_uninitialized*/false);
  25.197      return true;
  25.198    }
  25.199  
  25.200 @@ -4553,7 +4572,7 @@
  25.201  
  25.202    Node* original_dest      = dest;
  25.203    AllocateArrayNode* alloc = NULL;  // used for zeroing, if needed
  25.204 -  bool  must_clear_dest    = false;
  25.205 +  bool  dest_uninitialized = false;
  25.206  
  25.207    // See if this is the initialization of a newly-allocated array.
  25.208    // If so, we will take responsibility here for initializing it to zero.
  25.209 @@ -4576,12 +4595,14 @@
  25.210      adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
  25.211      // From this point on, every exit path is responsible for
  25.212      // initializing any non-copied parts of the object to zero.
  25.213 -    must_clear_dest = true;
  25.214 +    // Also, if this flag is set we make sure that arraycopy interacts properly
  25.215 +    // with G1, eliding pre-barriers. See CR 6627983.
  25.216 +    dest_uninitialized = true;
  25.217    } else {
  25.218      // No zeroing elimination here.
  25.219      alloc             = NULL;
  25.220      //original_dest   = dest;
  25.221 -    //must_clear_dest = false;
  25.222 +    //dest_uninitialized = false;
  25.223    }
  25.224  
  25.225    // Results are placed here:
  25.226 @@ -4613,10 +4634,10 @@
  25.227    Node* checked_value   = NULL;
  25.228  
  25.229    if (basic_elem_type == T_CONFLICT) {
  25.230 -    assert(!must_clear_dest, "");
  25.231 +    assert(!dest_uninitialized, "");
  25.232      Node* cv = generate_generic_arraycopy(adr_type,
  25.233                                            src, src_offset, dest, dest_offset,
  25.234 -                                          copy_length);
  25.235 +                                          copy_length, dest_uninitialized);
  25.236      if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
  25.237      checked_control = control();
  25.238      checked_i_o     = i_o();
  25.239 @@ -4636,7 +4657,7 @@
  25.240      }
  25.241  
  25.242      // copy_length is 0.
  25.243 -    if (!stopped() && must_clear_dest) {
  25.244 +    if (!stopped() && dest_uninitialized) {
  25.245        Node* dest_length = alloc->in(AllocateNode::ALength);
  25.246        if (_gvn.eqv_uncast(copy_length, dest_length)
  25.247            || _gvn.find_int_con(dest_length, 1) <= 0) {
  25.248 @@ -4662,7 +4683,7 @@
  25.249      result_memory->init_req(zero_path, memory(adr_type));
  25.250    }
  25.251  
  25.252 -  if (!stopped() && must_clear_dest) {
  25.253 +  if (!stopped() && dest_uninitialized) {
  25.254      // We have to initialize the *uncopied* part of the array to zero.
  25.255      // The copy destination is the slice dest[off..off+len].  The other slices
  25.256      // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
  25.257 @@ -4698,7 +4719,7 @@
  25.258        { PreserveJVMState pjvms(this);
  25.259          didit = generate_block_arraycopy(adr_type, basic_elem_type, alloc,
  25.260                                           src, src_offset, dest, dest_offset,
  25.261 -                                         dest_size);
  25.262 +                                         dest_size, dest_uninitialized);
  25.263          if (didit) {
  25.264            // Present the results of the block-copying fast call.
  25.265            result_region->init_req(bcopy_path, control());
  25.266 @@ -4774,7 +4795,7 @@
  25.267        Node* cv = generate_checkcast_arraycopy(adr_type,
  25.268                                                dest_elem_klass,
  25.269                                                src, src_offset, dest, dest_offset,
  25.270 -                                              ConvI2X(copy_length));
  25.271 +                                              ConvI2X(copy_length), dest_uninitialized);
  25.272        if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
  25.273        checked_control = control();
  25.274        checked_i_o     = i_o();
  25.275 @@ -4797,7 +4818,7 @@
  25.276      PreserveJVMState pjvms(this);
  25.277      generate_unchecked_arraycopy(adr_type, copy_type, disjoint_bases,
  25.278                                   src, src_offset, dest, dest_offset,
  25.279 -                                 ConvI2X(copy_length));
  25.280 +                                 ConvI2X(copy_length), dest_uninitialized);
  25.281  
  25.282      // Present the results of the fast call.
  25.283      result_region->init_req(fast_path, control());
  25.284 @@ -4876,7 +4897,7 @@
  25.285      set_memory(slow_mem, adr_type);
  25.286      set_i_o(slow_i_o);
  25.287  
  25.288 -    if (must_clear_dest) {
  25.289 +    if (dest_uninitialized) {
  25.290        generate_clear_array(adr_type, dest, basic_elem_type,
  25.291                             intcon(0), NULL,
  25.292                             alloc->in(AllocateNode::AllocSize));
  25.293 @@ -4884,7 +4905,7 @@
  25.294  
  25.295      generate_slow_arraycopy(adr_type,
  25.296                              src, src_offset, dest, dest_offset,
  25.297 -                            copy_length);
  25.298 +                            copy_length, /*dest_uninitialized*/false);
  25.299  
  25.300      result_region->init_req(slow_call_path, control());
  25.301      result_i_o   ->init_req(slow_call_path, i_o());
  25.302 @@ -5128,7 +5149,7 @@
  25.303                                           AllocateNode* alloc,
  25.304                                           Node* src,  Node* src_offset,
  25.305                                           Node* dest, Node* dest_offset,
  25.306 -                                         Node* dest_size) {
  25.307 +                                         Node* dest_size, bool dest_uninitialized) {
  25.308    // See if there is an advantage from block transfer.
  25.309    int scale = exact_log2(type2aelembytes(basic_elem_type));
  25.310    if (scale >= LogBytesPerLong)
  25.311 @@ -5173,7 +5194,7 @@
  25.312  
  25.313    bool disjoint_bases = true;   // since alloc != NULL
  25.314    generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
  25.315 -                               sptr, NULL, dptr, NULL, countx);
  25.316 +                               sptr, NULL, dptr, NULL, countx, dest_uninitialized);
  25.317  
  25.318    return true;
  25.319  }
  25.320 @@ -5186,7 +5207,8 @@
  25.321  LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
  25.322                                          Node* src,  Node* src_offset,
  25.323                                          Node* dest, Node* dest_offset,
  25.324 -                                        Node* copy_length) {
  25.325 +                                        Node* copy_length, bool dest_uninitialized) {
  25.326 +  assert(!dest_uninitialized, "Invariant");
  25.327    Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
  25.328                                   OptoRuntime::slow_arraycopy_Type(),
  25.329                                   OptoRuntime::slow_arraycopy_Java(),
  25.330 @@ -5204,10 +5226,10 @@
  25.331                                               Node* dest_elem_klass,
  25.332                                               Node* src,  Node* src_offset,
  25.333                                               Node* dest, Node* dest_offset,
  25.334 -                                             Node* copy_length) {
  25.335 +                                             Node* copy_length, bool dest_uninitialized) {
  25.336    if (stopped())  return NULL;
  25.337  
  25.338 -  address copyfunc_addr = StubRoutines::checkcast_arraycopy();
  25.339 +  address copyfunc_addr = StubRoutines::checkcast_arraycopy(dest_uninitialized);
  25.340    if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
  25.341      return NULL;
  25.342    }
  25.343 @@ -5245,9 +5267,9 @@
  25.344  LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
  25.345                                             Node* src,  Node* src_offset,
  25.346                                             Node* dest, Node* dest_offset,
  25.347 -                                           Node* copy_length) {
  25.348 +                                           Node* copy_length, bool dest_uninitialized) {
  25.349 +  assert(!dest_uninitialized, "Invariant");
  25.350    if (stopped())  return NULL;
  25.351 -
  25.352    address copyfunc_addr = StubRoutines::generic_arraycopy();
  25.353    if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
  25.354      return NULL;
  25.355 @@ -5268,7 +5290,7 @@
  25.356                                               bool disjoint_bases,
  25.357                                               Node* src,  Node* src_offset,
  25.358                                               Node* dest, Node* dest_offset,
  25.359 -                                             Node* copy_length) {
  25.360 +                                             Node* copy_length, bool dest_uninitialized) {
  25.361    if (stopped())  return;               // nothing to do
  25.362  
  25.363    Node* src_start  = src;
  25.364 @@ -5283,7 +5305,7 @@
  25.365    const char* copyfunc_name = "arraycopy";
  25.366    address     copyfunc_addr =
  25.367        basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
  25.368 -                          disjoint_bases, copyfunc_name);
  25.369 +                          disjoint_bases, copyfunc_name, dest_uninitialized);
  25.370  
  25.371    // Call it.  Note that the count_ix value is not scaled to a byte-size.
  25.372    make_runtime_call(RC_LEAF|RC_NO_FP,
    26.1 --- a/src/share/vm/opto/memnode.cpp	Thu Mar 03 21:02:56 2011 -0800
    26.2 +++ b/src/share/vm/opto/memnode.cpp	Thu Mar 03 23:31:45 2011 -0800
    26.3 @@ -1,5 +1,5 @@
    26.4  /*
    26.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    26.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    26.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    26.8   *
    26.9   * This code is free software; you can redistribute it and/or modify it
   26.10 @@ -1559,21 +1559,24 @@
   26.11              phase->C->has_unsafe_access(),
   26.12              "Field accesses must be precise" );
   26.13      // For oop loads, we expect the _type to be precise
   26.14 -    if (OptimizeStringConcat && klass == phase->C->env()->String_klass() &&
   26.15 +    if (klass == phase->C->env()->String_klass() &&
   26.16          adr->is_AddP() && off != Type::OffsetBot) {
   26.17 -      // For constant Strings treat the fields as compile time constants.
   26.18 +      // For constant Strings treat the final fields as compile time constants.
   26.19        Node* base = adr->in(AddPNode::Base);
   26.20        const TypeOopPtr* t = phase->type(base)->isa_oopptr();
   26.21        if (t != NULL && t->singleton()) {
   26.22 -        ciObject* string = t->const_oop();
   26.23 -        ciConstant constant = string->as_instance()->field_value_by_offset(off);
   26.24 -        if (constant.basic_type() == T_INT) {
   26.25 -          return TypeInt::make(constant.as_int());
   26.26 -        } else if (constant.basic_type() == T_ARRAY) {
   26.27 -          if (adr->bottom_type()->is_ptr_to_narrowoop()) {
   26.28 -            return TypeNarrowOop::make_from_constant(constant.as_object());
   26.29 -          } else {
   26.30 -            return TypeOopPtr::make_from_constant(constant.as_object());
   26.31 +        ciField* field = phase->C->env()->String_klass()->get_field_by_offset(off, false);
   26.32 +        if (field != NULL && field->is_final()) {
   26.33 +          ciObject* string = t->const_oop();
   26.34 +          ciConstant constant = string->as_instance()->field_value(field);
   26.35 +          if (constant.basic_type() == T_INT) {
   26.36 +            return TypeInt::make(constant.as_int());
   26.37 +          } else if (constant.basic_type() == T_ARRAY) {
   26.38 +            if (adr->bottom_type()->is_ptr_to_narrowoop()) {
   26.39 +              return TypeNarrowOop::make_from_constant(constant.as_object());
   26.40 +            } else {
   26.41 +              return TypeOopPtr::make_from_constant(constant.as_object());
   26.42 +            }
   26.43            }
   26.44          }
   26.45        }
   26.46 @@ -4077,6 +4080,7 @@
   26.47      n = base_memory();
   26.48      assert(Node::in_dump()
   26.49             || n == NULL || n->bottom_type() == Type::TOP
   26.50 +           || n->adr_type() == NULL // address is TOP
   26.51             || n->adr_type() == TypePtr::BOTTOM
   26.52             || n->adr_type() == TypeRawPtr::BOTTOM
   26.53             || Compile::current()->AliasLevel() == 0,
    27.1 --- a/src/share/vm/prims/unsafe.cpp	Thu Mar 03 21:02:56 2011 -0800
    27.2 +++ b/src/share/vm/prims/unsafe.cpp	Thu Mar 03 23:31:45 2011 -0800
    27.3 @@ -110,6 +110,8 @@
    27.4  
    27.5  inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
    27.6    jlong byte_offset = field_offset_to_byte_offset(field_offset);
    27.7 +  // Don't allow unsafe to be used to read or write the header word of oops
    27.8 +  assert(p == NULL || field_offset >= oopDesc::header_size(), "offset must be outside of header");
    27.9  #ifdef ASSERT
   27.10    if (p != NULL) {
   27.11      assert(byte_offset >= 0 && byte_offset <= (jlong)MAX_OBJECT_SIZE, "sane offset");
    28.1 --- a/src/share/vm/runtime/arguments.cpp	Thu Mar 03 21:02:56 2011 -0800
    28.2 +++ b/src/share/vm/runtime/arguments.cpp	Thu Mar 03 23:31:45 2011 -0800
    28.3 @@ -2798,10 +2798,6 @@
    28.4    if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
    28.5      FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
    28.6    }
    28.7 -  // Temporary disable bulk zeroing reduction with G1. See CR 6627983.
    28.8 -  if (UseG1GC) {
    28.9 -    FLAG_SET_DEFAULT(ReduceBulkZeroing, false);
   28.10 -  }
   28.11  #endif
   28.12  
   28.13    // If we are running in a headless jre, force java.awt.headless property
    29.1 --- a/src/share/vm/runtime/globals.hpp	Thu Mar 03 21:02:56 2011 -0800
    29.2 +++ b/src/share/vm/runtime/globals.hpp	Thu Mar 03 23:31:45 2011 -0800
    29.3 @@ -2610,9 +2610,6 @@
    29.4    develop(bool, CompileTheWorldPreloadClasses, true,                        \
    29.5            "Preload all classes used by a class before start loading")       \
    29.6                                                                              \
    29.7 -  notproduct(bool, CompileTheWorldIgnoreInitErrors, false,                  \
    29.8 -          "Compile all methods although class initializer failed")          \
    29.9 -                                                                            \
   29.10    notproduct(intx, CompileTheWorldSafepointInterval, 100,                   \
   29.11            "Force a safepoint every n compiles so sweeper can keep up")      \
   29.12                                                                              \
    30.1 --- a/src/share/vm/runtime/sharedRuntime.cpp	Thu Mar 03 21:02:56 2011 -0800
    30.2 +++ b/src/share/vm/runtime/sharedRuntime.cpp	Thu Mar 03 23:31:45 2011 -0800
    30.3 @@ -431,25 +431,24 @@
    30.4  // previous frame depending on the return address.
    30.5  
    30.6  address SharedRuntime::raw_exception_handler_for_return_address(JavaThread* thread, address return_address) {
    30.7 -  assert(frame::verify_return_pc(return_address), "must be a return pc");
    30.8 -
    30.9 -  // Reset MethodHandle flag.
   30.10 +  assert(frame::verify_return_pc(return_address), err_msg("must be a return address: " INTPTR_FORMAT, return_address));
   30.11 +
   30.12 +  // Reset method handle flag.
   30.13    thread->set_is_method_handle_return(false);
   30.14  
   30.15 -  // the fastest case first
   30.16 +  // The fastest case first
   30.17    CodeBlob* blob = CodeCache::find_blob(return_address);
   30.18 -  if (blob != NULL && blob->is_nmethod()) {
   30.19 -    nmethod* code = (nmethod*)blob;
   30.20 -    assert(code != NULL, "nmethod must be present");
   30.21 -    // Check if the return address is a MethodHandle call site.
   30.22 -    thread->set_is_method_handle_return(code->is_method_handle_return(return_address));
   30.23 +  nmethod* nm = (blob != NULL) ? blob->as_nmethod_or_null() : NULL;
   30.24 +  if (nm != NULL) {
   30.25 +    // Set flag if return address is a method handle call site.
   30.26 +    thread->set_is_method_handle_return(nm->is_method_handle_return(return_address));
   30.27      // native nmethods don't have exception handlers
   30.28 -    assert(!code->is_native_method(), "no exception handler");
   30.29 -    assert(code->header_begin() != code->exception_begin(), "no exception handler");
   30.30 -    if (code->is_deopt_pc(return_address)) {
   30.31 +    assert(!nm->is_native_method(), "no exception handler");
   30.32 +    assert(nm->header_begin() != nm->exception_begin(), "no exception handler");
   30.33 +    if (nm->is_deopt_pc(return_address)) {
   30.34        return SharedRuntime::deopt_blob()->unpack_with_exception();
   30.35      } else {
   30.36 -      return code->exception_begin();
   30.37 +      return nm->exception_begin();
   30.38      }
   30.39    }
   30.40  
   30.41 @@ -462,22 +461,9 @@
   30.42      return Interpreter::rethrow_exception_entry();
   30.43    }
   30.44  
   30.45 -  // Compiled code
   30.46 -  if (CodeCache::contains(return_address)) {
   30.47 -    CodeBlob* blob = CodeCache::find_blob(return_address);
   30.48 -    if (blob->is_nmethod()) {
   30.49 -      nmethod* code = (nmethod*)blob;
   30.50 -      assert(code != NULL, "nmethod must be present");
   30.51 -      // Check if the return address is a MethodHandle call site.
   30.52 -      thread->set_is_method_handle_return(code->is_method_handle_return(return_address));
   30.53 -      assert(code->header_begin() != code->exception_begin(), "no exception handler");
   30.54 -      return code->exception_begin();
   30.55 -    }
   30.56 -    if (blob->is_runtime_stub()) {
   30.57 -      ShouldNotReachHere();   // callers are responsible for skipping runtime stub frames
   30.58 -    }
   30.59 -  }
   30.60 +  guarantee(blob == NULL || !blob->is_runtime_stub(), "caller should have skipped stub");
   30.61    guarantee(!VtableStubs::contains(return_address), "NULL exceptions in vtables should have been handled already!");
   30.62 +
   30.63  #ifndef PRODUCT
   30.64    { ResourceMark rm;
   30.65      tty->print_cr("No exception handler found for exception at " INTPTR_FORMAT " - potential problems:", return_address);
   30.66 @@ -485,6 +471,7 @@
   30.67      tty->print_cr("b) other problem");
   30.68    }
   30.69  #endif // PRODUCT
   30.70 +
   30.71    ShouldNotReachHere();
   30.72    return NULL;
   30.73  }
    31.1 --- a/src/share/vm/runtime/stubRoutines.cpp	Thu Mar 03 21:02:56 2011 -0800
    31.2 +++ b/src/share/vm/runtime/stubRoutines.cpp	Thu Mar 03 23:31:45 2011 -0800
    31.3 @@ -1,5 +1,5 @@
    31.4  /*
    31.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    31.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    31.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    31.8   *
    31.9   * This code is free software; you can redistribute it and/or modify it
   31.10 @@ -80,30 +80,36 @@
   31.11  jint    StubRoutines::_fpu_subnormal_bias2[3]                   = { 0, 0, 0 };
   31.12  
   31.13  // Compiled code entry points default values
   31.14 -// The dafault functions don't have separate disjoint versions.
   31.15 +// The default functions don't have separate disjoint versions.
   31.16  address StubRoutines::_jbyte_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jbyte_copy);
   31.17  address StubRoutines::_jshort_arraycopy         = CAST_FROM_FN_PTR(address, StubRoutines::jshort_copy);
   31.18  address StubRoutines::_jint_arraycopy           = CAST_FROM_FN_PTR(address, StubRoutines::jint_copy);
   31.19  address StubRoutines::_jlong_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jlong_copy);
   31.20  address StubRoutines::_oop_arraycopy            = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy);
   31.21 +address StubRoutines::_oop_arraycopy_uninit     = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy_uninit);
   31.22  address StubRoutines::_jbyte_disjoint_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jbyte_copy);
   31.23  address StubRoutines::_jshort_disjoint_arraycopy         = CAST_FROM_FN_PTR(address, StubRoutines::jshort_copy);
   31.24  address StubRoutines::_jint_disjoint_arraycopy           = CAST_FROM_FN_PTR(address, StubRoutines::jint_copy);
   31.25  address StubRoutines::_jlong_disjoint_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jlong_copy);
   31.26  address StubRoutines::_oop_disjoint_arraycopy            = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy);
   31.27 +address StubRoutines::_oop_disjoint_arraycopy_uninit     = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy_uninit);
   31.28  
   31.29  address StubRoutines::_arrayof_jbyte_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jbyte_copy);
   31.30  address StubRoutines::_arrayof_jshort_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jshort_copy);
   31.31  address StubRoutines::_arrayof_jint_arraycopy   = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jint_copy);
   31.32  address StubRoutines::_arrayof_jlong_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jlong_copy);
   31.33  address StubRoutines::_arrayof_oop_arraycopy    = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
   31.34 +address StubRoutines::_arrayof_oop_arraycopy_uninit      = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
   31.35  address StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jbyte_copy);
   31.36  address StubRoutines::_arrayof_jshort_disjoint_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jshort_copy);
   31.37  address StubRoutines::_arrayof_jint_disjoint_arraycopy   = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jint_copy);
   31.38  address StubRoutines::_arrayof_jlong_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jlong_copy);
   31.39 -address StubRoutines::_arrayof_oop_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
   31.40 +address StubRoutines::_arrayof_oop_disjoint_arraycopy    = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
   31.41 +address StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
   31.42 +
   31.43  
   31.44  address StubRoutines::_checkcast_arraycopy               = NULL;
   31.45 +address StubRoutines::_checkcast_arraycopy_uninit        = NULL;
   31.46  address StubRoutines::_unsafe_arraycopy                  = NULL;
   31.47  address StubRoutines::_generic_arraycopy                 = NULL;
   31.48  
   31.49 @@ -282,12 +288,12 @@
   31.50  // Default versions of arraycopy functions
   31.51  //
   31.52  
   31.53 -static void gen_arraycopy_barrier_pre(oop* dest, size_t count) {
   31.54 +static void gen_arraycopy_barrier_pre(oop* dest, size_t count, bool dest_uninitialized) {
   31.55      assert(count != 0, "count should be non-zero");
   31.56      assert(count <= (size_t)max_intx, "count too large");
   31.57      BarrierSet* bs = Universe::heap()->barrier_set();
   31.58      assert(bs->has_write_ref_array_pre_opt(), "Must have pre-barrier opt");
   31.59 -    bs->write_ref_array_pre(dest, (int)count);
   31.60 +    bs->write_ref_array_pre(dest, (int)count, dest_uninitialized);
   31.61  }
   31.62  
   31.63  static void gen_arraycopy_barrier(oop* dest, size_t count) {
   31.64 @@ -330,7 +336,17 @@
   31.65    SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
   31.66  #endif // !PRODUCT
   31.67    assert(count != 0, "count should be non-zero");
   31.68 -  gen_arraycopy_barrier_pre(dest, count);
   31.69 +  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/false);
   31.70 +  Copy::conjoint_oops_atomic(src, dest, count);
   31.71 +  gen_arraycopy_barrier(dest, count);
   31.72 +JRT_END
   31.73 +
   31.74 +JRT_LEAF(void, StubRoutines::oop_copy_uninit(oop* src, oop* dest, size_t count))
   31.75 +#ifndef PRODUCT
   31.76 +  SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
   31.77 +#endif // !PRODUCT
   31.78 +  assert(count != 0, "count should be non-zero");
   31.79 +  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/true);
   31.80    Copy::conjoint_oops_atomic(src, dest, count);
   31.81    gen_arraycopy_barrier(dest, count);
   31.82  JRT_END
   31.83 @@ -368,11 +384,20 @@
   31.84    SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
   31.85  #endif // !PRODUCT
   31.86    assert(count != 0, "count should be non-zero");
   31.87 -  gen_arraycopy_barrier_pre((oop *) dest, count);
   31.88 +  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/false);
   31.89    Copy::arrayof_conjoint_oops(src, dest, count);
   31.90    gen_arraycopy_barrier((oop *) dest, count);
   31.91  JRT_END
   31.92  
   31.93 +JRT_LEAF(void, StubRoutines::arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count))
   31.94 +#ifndef PRODUCT
   31.95 +  SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
   31.96 +#endif // !PRODUCT
   31.97 +  assert(count != 0, "count should be non-zero");
   31.98 +  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/true);
   31.99 +  Copy::arrayof_conjoint_oops(src, dest, count);
  31.100 +  gen_arraycopy_barrier((oop *) dest, count);
  31.101 +JRT_END
  31.102  
  31.103  address StubRoutines::select_fill_function(BasicType t, bool aligned, const char* &name) {
  31.104  #define RETURN_STUB(xxx_fill) { \
    32.1 --- a/src/share/vm/runtime/stubRoutines.hpp	Thu Mar 03 21:02:56 2011 -0800
    32.2 +++ b/src/share/vm/runtime/stubRoutines.hpp	Thu Mar 03 23:31:45 2011 -0800
    32.3 @@ -1,5 +1,5 @@
    32.4  /*
    32.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    32.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    32.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    32.8   *
    32.9   * This code is free software; you can redistribute it and/or modify it
   32.10 @@ -164,12 +164,12 @@
   32.11    static address _jshort_arraycopy;
   32.12    static address _jint_arraycopy;
   32.13    static address _jlong_arraycopy;
   32.14 -  static address _oop_arraycopy;
   32.15 +  static address _oop_arraycopy, _oop_arraycopy_uninit;
   32.16    static address _jbyte_disjoint_arraycopy;
   32.17    static address _jshort_disjoint_arraycopy;
   32.18    static address _jint_disjoint_arraycopy;
   32.19    static address _jlong_disjoint_arraycopy;
   32.20 -  static address _oop_disjoint_arraycopy;
   32.21 +  static address _oop_disjoint_arraycopy, _oop_disjoint_arraycopy_uninit;
   32.22  
   32.23    // arraycopy operands aligned on zero'th element boundary
   32.24    // These are identical to the ones aligned aligned on an
   32.25 @@ -179,15 +179,15 @@
   32.26    static address _arrayof_jshort_arraycopy;
   32.27    static address _arrayof_jint_arraycopy;
   32.28    static address _arrayof_jlong_arraycopy;
   32.29 -  static address _arrayof_oop_arraycopy;
   32.30 +  static address _arrayof_oop_arraycopy, _arrayof_oop_arraycopy_uninit;
   32.31    static address _arrayof_jbyte_disjoint_arraycopy;
   32.32    static address _arrayof_jshort_disjoint_arraycopy;
   32.33    static address _arrayof_jint_disjoint_arraycopy;
   32.34    static address _arrayof_jlong_disjoint_arraycopy;
   32.35 -  static address _arrayof_oop_disjoint_arraycopy;
   32.36 +  static address _arrayof_oop_disjoint_arraycopy, _arrayof_oop_disjoint_arraycopy_uninit;
   32.37  
   32.38    // these are recommended but optional:
   32.39 -  static address _checkcast_arraycopy;
   32.40 +  static address _checkcast_arraycopy, _checkcast_arraycopy_uninit;
   32.41    static address _unsafe_arraycopy;
   32.42    static address _generic_arraycopy;
   32.43  
   32.44 @@ -286,26 +286,36 @@
   32.45    static address jshort_arraycopy() { return _jshort_arraycopy; }
   32.46    static address jint_arraycopy()   { return _jint_arraycopy; }
   32.47    static address jlong_arraycopy()  { return _jlong_arraycopy; }
   32.48 -  static address oop_arraycopy()    { return _oop_arraycopy; }
   32.49 +  static address oop_arraycopy(bool dest_uninitialized = false) {
   32.50 +    return dest_uninitialized ? _oop_arraycopy_uninit : _oop_arraycopy;
   32.51 +  }
   32.52    static address jbyte_disjoint_arraycopy()  { return _jbyte_disjoint_arraycopy; }
   32.53    static address jshort_disjoint_arraycopy() { return _jshort_disjoint_arraycopy; }
   32.54    static address jint_disjoint_arraycopy()   { return _jint_disjoint_arraycopy; }
   32.55    static address jlong_disjoint_arraycopy()  { return _jlong_disjoint_arraycopy; }
   32.56 -  static address oop_disjoint_arraycopy()    { return _oop_disjoint_arraycopy; }
   32.57 +  static address oop_disjoint_arraycopy(bool dest_uninitialized = false) {
   32.58 +    return dest_uninitialized ?  _oop_disjoint_arraycopy_uninit : _oop_disjoint_arraycopy;
   32.59 +  }
   32.60  
   32.61    static address arrayof_jbyte_arraycopy()  { return _arrayof_jbyte_arraycopy; }
   32.62    static address arrayof_jshort_arraycopy() { return _arrayof_jshort_arraycopy; }
   32.63    static address arrayof_jint_arraycopy()   { return _arrayof_jint_arraycopy; }
   32.64    static address arrayof_jlong_arraycopy()  { return _arrayof_jlong_arraycopy; }
   32.65 -  static address arrayof_oop_arraycopy()    { return _arrayof_oop_arraycopy; }
   32.66 +  static address arrayof_oop_arraycopy(bool dest_uninitialized = false) {
   32.67 +    return dest_uninitialized ? _arrayof_oop_arraycopy_uninit : _arrayof_oop_arraycopy;
   32.68 +  }
   32.69  
   32.70    static address arrayof_jbyte_disjoint_arraycopy()  { return _arrayof_jbyte_disjoint_arraycopy; }
   32.71    static address arrayof_jshort_disjoint_arraycopy() { return _arrayof_jshort_disjoint_arraycopy; }
   32.72    static address arrayof_jint_disjoint_arraycopy()   { return _arrayof_jint_disjoint_arraycopy; }
   32.73    static address arrayof_jlong_disjoint_arraycopy()  { return _arrayof_jlong_disjoint_arraycopy; }
   32.74 -  static address arrayof_oop_disjoint_arraycopy()    { return _arrayof_oop_disjoint_arraycopy; }
   32.75 +  static address arrayof_oop_disjoint_arraycopy(bool dest_uninitialized = false) {
   32.76 +    return dest_uninitialized ? _arrayof_oop_disjoint_arraycopy_uninit : _arrayof_oop_disjoint_arraycopy;
   32.77 +  }
   32.78  
   32.79 -  static address checkcast_arraycopy()     { return _checkcast_arraycopy; }
   32.80 +  static address checkcast_arraycopy(bool dest_uninitialized = false) {
   32.81 +    return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy;
   32.82 +  }
   32.83    static address unsafe_arraycopy()        { return _unsafe_arraycopy; }
   32.84    static address generic_arraycopy()       { return _generic_arraycopy; }
   32.85  
   32.86 @@ -352,17 +362,19 @@
   32.87    // Default versions of the above arraycopy functions for platforms which do
   32.88    // not have specialized versions
   32.89    //
   32.90 -  static void jbyte_copy (jbyte*  src, jbyte*  dest, size_t count);
   32.91 -  static void jshort_copy(jshort* src, jshort* dest, size_t count);
   32.92 -  static void jint_copy  (jint*   src, jint*   dest, size_t count);
   32.93 -  static void jlong_copy (jlong*  src, jlong*  dest, size_t count);
   32.94 -  static void oop_copy   (oop*    src, oop*    dest, size_t count);
   32.95 +  static void jbyte_copy     (jbyte*  src, jbyte*  dest, size_t count);
   32.96 +  static void jshort_copy    (jshort* src, jshort* dest, size_t count);
   32.97 +  static void jint_copy      (jint*   src, jint*   dest, size_t count);
   32.98 +  static void jlong_copy     (jlong*  src, jlong*  dest, size_t count);
   32.99 +  static void oop_copy       (oop*    src, oop*    dest, size_t count);
  32.100 +  static void oop_copy_uninit(oop*    src, oop*    dest, size_t count);
  32.101  
  32.102 -  static void arrayof_jbyte_copy (HeapWord* src, HeapWord* dest, size_t count);
  32.103 -  static void arrayof_jshort_copy(HeapWord* src, HeapWord* dest, size_t count);
  32.104 -  static void arrayof_jint_copy  (HeapWord* src, HeapWord* dest, size_t count);
  32.105 -  static void arrayof_jlong_copy (HeapWord* src, HeapWord* dest, size_t count);
  32.106 -  static void arrayof_oop_copy   (HeapWord* src, HeapWord* dest, size_t count);
  32.107 +  static void arrayof_jbyte_copy     (HeapWord* src, HeapWord* dest, size_t count);
  32.108 +  static void arrayof_jshort_copy    (HeapWord* src, HeapWord* dest, size_t count);
  32.109 +  static void arrayof_jint_copy      (HeapWord* src, HeapWord* dest, size_t count);
  32.110 +  static void arrayof_jlong_copy     (HeapWord* src, HeapWord* dest, size_t count);
  32.111 +  static void arrayof_oop_copy       (HeapWord* src, HeapWord* dest, size_t count);
  32.112 +  static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);
  32.113  };
  32.114  
  32.115  #endif // SHARE_VM_RUNTIME_STUBROUTINES_HPP
    33.1 --- a/src/share/vm/utilities/macros.hpp	Thu Mar 03 21:02:56 2011 -0800
    33.2 +++ b/src/share/vm/utilities/macros.hpp	Thu Mar 03 23:31:45 2011 -0800
    33.3 @@ -1,5 +1,5 @@
    33.4  /*
    33.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    33.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    33.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    33.8   *
    33.9   * This code is free software; you can redistribute it and/or modify it
   33.10 @@ -161,6 +161,14 @@
   33.11  #define NOT_WINDOWS(code) code
   33.12  #endif
   33.13  
   33.14 +#ifdef _WIN64
   33.15 +#define WIN64_ONLY(code) code
   33.16 +#define NOT_WIN64(code)
   33.17 +#else
   33.18 +#define WIN64_ONLY(code)
   33.19 +#define NOT_WIN64(code) code
   33.20 +#endif
   33.21 +
   33.22  #if defined(IA32) || defined(AMD64)
   33.23  #define X86
   33.24  #define X86_ONLY(code) code
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/test/compiler/6942326/Test.java	Thu Mar 03 23:31:45 2011 -0800
    34.3 @@ -0,0 +1,409 @@
    34.4 +/*
    34.5 + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
    34.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    34.7 + *
    34.8 + * This code is free software; you can redistribute it and/or modify it
    34.9 + * under the terms of the GNU General Public License version 2 only, as
   34.10 + * published by the Free Software Foundation.
   34.11 + *
   34.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   34.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   34.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   34.15 + * version 2 for more details (a copy is included in the LICENSE file that
   34.16 + * accompanied this code).
   34.17 + *
   34.18 + * You should have received a copy of the GNU General Public License version
   34.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   34.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   34.21 + *
   34.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   34.23 + * or visit www.oracle.com if you need additional information or have any
   34.24 + * questions.
   34.25 + *
   34.26 + */
   34.27 +
   34.28 +/**
   34.29 + * @test
   34.30 + * @bug 6942326
   34.31 + * @summary x86 code in string_indexof() could read beyond reserved heap space
   34.32 + *
   34.33 + * @run main/othervm/timeout=300 -Xmx32m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:CompileCommand=exclude,Test,main -XX:CompileCommand=exclude,Test,test_varsub_indexof -XX:CompileCommand=exclude,Test,test_varstr_indexof -XX:CompileCommand=exclude,Test,test_missub_indexof -XX:CompileCommand=exclude,Test,test_consub_indexof -XX:CompileCommand=exclude,Test,test_conmis_indexof -XX:CompileCommand=exclude,Test,test_subcon Test
   34.34 + *
   34.35 + */
   34.36 +
   34.37 +public class Test {
   34.38 +
   34.39 +    static String[] strings = new String[1024];
   34.40 +    private static final int ITERATIONS = 100000;
   34.41 +
   34.42 +    public static void main(String[] args) {
   34.43 +
   34.44 +        long start_total = System.currentTimeMillis();
   34.45 +
   34.46 +        // search variable size substring in string (33 chars).
   34.47 +        String a = " 1111111111111xx1111111111111xx11y"; // +1 to execute a.substring(1) first
   34.48 +        String b =  "1111111111111xx1111111111111xx11y";
   34.49 +        test_varsub_indexof(a, b);
   34.50 +
   34.51 +        // search variable size substring in string (32 chars).
   34.52 +        a = " 1111111111111xx1111111111111xx1y";
   34.53 +        b =  "1111111111111xx1111111111111xx1y";
   34.54 +        test_varsub_indexof(a, b);
   34.55 +
   34.56 +        // search variable size substring in string (17 chars).
   34.57 +        a = " 1111111111111xx1y";
   34.58 +        b =  "1111111111111xx1y";
   34.59 +        test_varsub_indexof(a, b);
   34.60 +
   34.61 +        // search variable size substring in string (16 chars).
   34.62 +        a = " 111111111111xx1y";
   34.63 +        b =  "111111111111xx1y";
   34.64 +        test_varsub_indexof(a, b);
   34.65 +
   34.66 +        // search variable size substring in string (8 chars).
   34.67 +        a = " 1111xx1y";
   34.68 +        b =  "1111xx1y";
   34.69 +        test_varsub_indexof(a, b);
   34.70 +
   34.71 +        // search variable size substring in string (7 chars).
   34.72 +        a = " 111xx1y";
   34.73 +        b =  "111xx1y";
   34.74 +        test_varsub_indexof(a, b);
   34.75 +
   34.76 +
   34.77 +
   34.78 +        // search substring (17 chars) in variable size string.
   34.79 +        a =                 "1111111111111xx1x";
   34.80 +        b = " 1111111111111xx1111111111111xx1x"; // +1 to execute b.substring(1) first
   34.81 +        test_varstr_indexof(a, b);
   34.82 +
   34.83 +        // search substring (16 chars) in variable size string.
   34.84 +        a =                  "111111111111xx1x";
   34.85 +        b = " 1111111111111xx1111111111111xx1x";
   34.86 +        test_varstr_indexof(a, b);
   34.87 +
   34.88 +        // search substring (9 chars) in variable size string.
   34.89 +        a =                         "11111xx1x";
   34.90 +        b = " 1111111111111xx1111111111111xx1x";
   34.91 +        test_varstr_indexof(a, b);
   34.92 +
   34.93 +        // search substring (8 chars) in variable size string.
   34.94 +        a =                          "1111xx1x";
   34.95 +        b = " 1111111111111xx1111111111111xx1x";
   34.96 +        test_varstr_indexof(a, b);
   34.97 +
   34.98 +        // search substring (4 chars) in variable size string.
   34.99 +        a =                              "xx1x";
  34.100 +        b = " 1111111111111xx1111111111111xx1x";
  34.101 +        test_varstr_indexof(a, b);
  34.102 +
  34.103 +        // search substring (3 chars) in variable size string.
  34.104 +        a =                               "x1x";
  34.105 +        b = " 1111111111111xx1111111111111xx1x";
  34.106 +        test_varstr_indexof(a, b);
  34.107 +
  34.108 +        // search substring (2 chars) in variable size string.
  34.109 +        a =                                "1y";
  34.110 +        b = " 1111111111111xx1111111111111xx1y";
  34.111 +        test_varstr_indexof(a, b);
  34.112 +
  34.113 +
  34.114 +
  34.115 +        // search non matching variable size substring in string (33 chars).
  34.116 +        a = " 1111111111111xx1111111111111xx11z"; // +1 to execute a.substring(1) first
  34.117 +        b =  "1111111111111xx1111111111111xx11y";
  34.118 +        test_missub_indexof(a, b);
  34.119 +
  34.120 +        // search non matching variable size substring in string (32 chars).
  34.121 +        a = " 1111111111111xx1111111111111xx1z";
  34.122 +        b =  "1111111111111xx1111111111111xx1y";
  34.123 +        test_missub_indexof(a, b);
  34.124 +
  34.125 +        // search non matching variable size substring in string (17 chars).
  34.126 +        a = " 1111111111111xx1z";
  34.127 +        b =  "1111111111111xx1y";
  34.128 +        test_missub_indexof(a, b);
  34.129 +
  34.130 +        // search non matching variable size substring in string (16 chars).
  34.131 +        a = " 111111111111xx1z";
  34.132 +        b =  "111111111111xx1y";
  34.133 +        test_missub_indexof(a, b);
  34.134 +
  34.135 +        // search non matching variable size substring in string (8 chars).
  34.136 +        a = " 1111xx1z";
  34.137 +        b =  "1111xx1y";
  34.138 +        test_missub_indexof(a, b);
  34.139 +
  34.140 +        // search non matching variable size substring in string (7 chars).
  34.141 +        a = " 111xx1z";
  34.142 +        b =  "111xx1y";
  34.143 +        test_missub_indexof(a, b);
  34.144 +
  34.145 +
  34.146 +
  34.147 +        // Testing constant substring search in variable size string.
  34.148 +
  34.149 +        // search constant substring (17 chars).
  34.150 +        b = " 1111111111111xx1111111111111xx1x"; // +1 to execute b.substring(1) first
  34.151 +        TestCon tc = new TestCon17();
  34.152 +        test_consub_indexof(tc, b);
  34.153 +
  34.154 +        // search constant substring (16 chars).
  34.155 +        b = " 1111111111111xx1111111111111xx1x";
  34.156 +        tc = new TestCon16();
  34.157 +        test_consub_indexof(tc, b);
  34.158 +
  34.159 +        // search constant substring (9 chars).
  34.160 +        b = " 1111111111111xx1111111111111xx1x";
  34.161 +        tc = new TestCon9();
  34.162 +        test_consub_indexof(tc, b);
  34.163 +
  34.164 +        // search constant substring (8 chars).
  34.165 +        b = " 1111111111111xx1111111111111xx1x";
  34.166 +        tc = new TestCon8();
  34.167 +        test_consub_indexof(tc, b);
  34.168 +
  34.169 +        // search constant substring (4 chars).
  34.170 +        b = " 1111111111111xx1111111111111xx1x";
  34.171 +        tc = new TestCon4();
  34.172 +        test_consub_indexof(tc, b);
  34.173 +
  34.174 +        // search constant substring (3 chars).
  34.175 +        b = " 1111111111111xx1111111111111xx1x";
  34.176 +        tc = new TestCon3();
  34.177 +        test_consub_indexof(tc, b);
  34.178 +
  34.179 +        // search constant substring (2 chars).
  34.180 +        b = " 1111111111111xx1111111111111xx1y";
  34.181 +        tc = new TestCon2();
  34.182 +        test_consub_indexof(tc, b);
  34.183 +
  34.184 +        // search constant substring (1 chars).
  34.185 +        b = " 1111111111111xx1111111111111xx1y";
  34.186 +        tc = new TestCon1();
  34.187 +        test_consub_indexof(tc, b);
  34.188 +
  34.189 +
  34.190 +        // search non matching constant substring (17 chars).
  34.191 +        b = " 1111111111111xx1111111111111xx1z"; // +1 to execute b.substring(1) first
  34.192 +        tc = new TestCon17();
  34.193 +        test_conmis_indexof(tc, b);
  34.194 +
  34.195 +        // search non matching constant substring (16 chars).
  34.196 +        b = " 1111111111111xx1111111111111xx1z";
  34.197 +        tc = new TestCon16();
  34.198 +        test_conmis_indexof(tc, b);
  34.199 +
  34.200 +        // search non matching constant substring (9 chars).
  34.201 +        b = " 1111111111111xx1111111111111xx1z";
  34.202 +        tc = new TestCon9();
  34.203 +        test_conmis_indexof(tc, b);
  34.204 +
  34.205 +        // search non matching constant substring (8 chars).
  34.206 +        b = " 1111111111111xx1111111111111xx1z";
  34.207 +        tc = new TestCon8();
  34.208 +        test_conmis_indexof(tc, b);
  34.209 +
  34.210 +        // search non matching constant substring (4 chars).
  34.211 +        b = " 1111111111111xx1111111111111xx1z";
  34.212 +        tc = new TestCon4();
  34.213 +        test_conmis_indexof(tc, b);
  34.214 +
  34.215 +        // search non matching constant substring (3 chars).
  34.216 +        b = " 1111111111111xx1111111111111xx1z";
  34.217 +        tc = new TestCon3();
  34.218 +        test_conmis_indexof(tc, b);
  34.219 +
  34.220 +        // search non matching constant substring (2 chars).
  34.221 +        b = " 1111111111111xx1111111111111xx1z";
  34.222 +        tc = new TestCon2();
  34.223 +        test_conmis_indexof(tc, b);
  34.224 +
  34.225 +        // search non matching constant substring (1 chars).
  34.226 +        b = " 1111111111111xx1111111111111xx1z";
  34.227 +        tc = new TestCon1();
  34.228 +        test_conmis_indexof(tc, b);
  34.229 +
  34.230 +        long end_total = System.currentTimeMillis();
  34.231 +        System.out.println("End run time: " + (end_total - start_total));
  34.232 +
  34.233 +    }
  34.234 +
  34.235 +    public static long test_init(String a, String b) {
  34.236 +        for (int i = 0; i < 512; i++) {
  34.237 +            strings[i * 2] = new String(b.toCharArray());
  34.238 +            strings[i * 2 + 1] = new String(a.toCharArray());
  34.239 +        }
  34.240 +        System.out.print(a.length() + " " + b.length() + " ");
  34.241 +        return System.currentTimeMillis();
  34.242 +    }
  34.243 +
  34.244 +    public static void test_end(String a, String b, int v, int expected, long start) {
  34.245 +        long end = System.currentTimeMillis();
  34.246 +        int res = (v/ITERATIONS);
  34.247 +        System.out.print(" " + res);
  34.248 +        System.out.println(" time:" + (end - start));
  34.249 +        if (res != expected) {
  34.250 +            System.out.println("wrong indexOf result: " + res + ", expected " + expected);
  34.251 +            System.out.println("\"" + b + "\".indexOf(\"" + a + "\")");
  34.252 +            System.exit(97);
  34.253 +        }
  34.254 +    }
  34.255 +
  34.256 +    public static int test_subvar() {
  34.257 +        int s = 0;
  34.258 +        int v = 0;
  34.259 +        for (int i = 0; i < ITERATIONS; i++) {
  34.260 +            v += strings[s].indexOf(strings[s + 1]);
  34.261 +            s += 2;
  34.262 +            if (s >= strings.length) s = 0;
  34.263 +        }
  34.264 +        return v;
  34.265 +    }
  34.266 +
  34.267 +    public static void test_varsub_indexof(String a, String b) {
  34.268 +        System.out.println("Start search variable size substring in string (" + b.length() + " chars)");
  34.269 +        long start_it = System.currentTimeMillis();
  34.270 +        int limit = 1; // last a.length() == 1
  34.271 +        while (a.length() > limit) {
  34.272 +            a = a.substring(1);
  34.273 +            long start = test_init(a, b);
  34.274 +            int v = test_subvar();
  34.275 +            test_end(a, b, v, (b.length() - a.length()), start);
  34.276 +        }
  34.277 +        long end_it = System.currentTimeMillis();
  34.278 +        System.out.println("End search variable size substring in string (" + b.length() + " chars), time: " + (end_it - start_it));
  34.279 +    }
  34.280 +
  34.281 +    public static void test_varstr_indexof(String a, String b) {
  34.282 +        System.out.println("Start search substring (" + a.length() + " chars) in variable size string");
  34.283 +        long start_it = System.currentTimeMillis();
  34.284 +        int limit = a.length();
  34.285 +        while (b.length() > limit) {
  34.286 +            b = b.substring(1);
  34.287 +            long start = test_init(a, b);
  34.288 +            int v = test_subvar();
  34.289 +            test_end(a, b, v, (b.length() - a.length()), start);
  34.290 +        }
  34.291 +        long end_it = System.currentTimeMillis();
  34.292 +        System.out.println("End search substring (" + a.length() + " chars) in variable size string, time: " + (end_it - start_it));
  34.293 +    }
  34.294 +
  34.295 +    public static void test_missub_indexof(String a, String b) {
  34.296 +        System.out.println("Start search non matching variable size substring in string (" + b.length() + " chars)");
  34.297 +        long start_it = System.currentTimeMillis();
  34.298 +        int limit = 1; // last a.length() == 1
  34.299 +        while (a.length() > limit) {
  34.300 +            a = a.substring(1);
  34.301 +            long start = test_init(a, b);
  34.302 +            int v = test_subvar();
  34.303 +            test_end(a, b, v, (-1), start);
  34.304 +        }
  34.305 +        long end_it = System.currentTimeMillis();
  34.306 +        System.out.println("End search non matching variable size substring in string (" + b.length() + " chars), time: " + (end_it - start_it));
  34.307 +    }
  34.308 +
  34.309 +
  34.310 +
  34.311 +    public static void test_consub_indexof(TestCon tc, String b) {
  34.312 +        System.out.println("Start search constant substring (" + tc.constr().length() + " chars)");
  34.313 +        long start_it = System.currentTimeMillis();
  34.314 +        int limit = tc.constr().length();
  34.315 +        while (b.length() > limit) {
  34.316 +            b = b.substring(1);
  34.317 +            long start = test_init(tc.constr(), b);
  34.318 +            int v = test_subcon(tc);
  34.319 +            test_end(tc.constr(), b, v, (b.length() - tc.constr().length()), start);
  34.320 +        }
  34.321 +        long end_it = System.currentTimeMillis();
  34.322 +        System.out.println("End search constant substring (" + tc.constr().length() + " chars), time: " + (end_it - start_it));
  34.323 +    }
  34.324 +
  34.325 +    public static void test_conmis_indexof(TestCon tc, String b) {
  34.326 +        System.out.println("Start search non matching constant substring (" + tc.constr().length() + " chars)");
  34.327 +        long start_it = System.currentTimeMillis();
  34.328 +        int limit = tc.constr().length();
  34.329 +        while (b.length() > limit) {
  34.330 +            b = b.substring(1);
  34.331 +            long start = test_init(tc.constr(), b);
  34.332 +            int v = test_subcon(tc);
  34.333 +            test_end(tc.constr(), b, v, (-1), start);
  34.334 +        }
  34.335 +        long end_it = System.currentTimeMillis();
  34.336 +        System.out.println("End search non matching constant substring (" + tc.constr().length() + " chars), time: " + (end_it - start_it));
  34.337 +    }
  34.338 +
  34.339 +    public static int test_subcon(TestCon tc) {
  34.340 +        int s = 0;
  34.341 +        int v = 0;
  34.342 +        for (int i = 0; i < ITERATIONS; i++) {
  34.343 +            v += tc.indexOf(strings[s]);
  34.344 +            s += 2;
  34.345 +            if (s >= strings.length) s = 0;
  34.346 +        }
  34.347 +        return v;
  34.348 +    }
  34.349 +
  34.350 +    private interface TestCon {
  34.351 +        public String constr();
  34.352 +        public int indexOf(String str);
  34.353 +    }
  34.354 +
  34.355 +    // search constant substring (17 chars).
  34.356 +    private final static class TestCon17 implements TestCon {
  34.357 +        private static final String constr = "1111111111111xx1x";
  34.358 +        public String constr() { return constr; }
  34.359 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.360 +    }
  34.361 +
  34.362 +    // search constant substring (16 chars).
  34.363 +    private final static class TestCon16 implements TestCon {
  34.364 +        private static final String constr = "111111111111xx1x";
  34.365 +        public String constr() { return constr; }
  34.366 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.367 +    }
  34.368 +
  34.369 +    // search constant substring (9 chars).
  34.370 +    private final static class TestCon9 implements TestCon {
  34.371 +        private static final String constr = "11111xx1x";
  34.372 +        public String constr() { return constr; }
  34.373 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.374 +    }
  34.375 +
  34.376 +    // search constant substring (8 chars).
  34.377 +    private final static class TestCon8 implements TestCon {
  34.378 +        private static final String constr = "1111xx1x";
  34.379 +        public String constr() { return constr; }
  34.380 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.381 +    }
  34.382 +
  34.383 +    // search constant substring (4 chars).
  34.384 +    private final static class TestCon4 implements TestCon {
  34.385 +        private static final String constr = "xx1x";
  34.386 +        public String constr() { return constr; }
  34.387 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.388 +    }
  34.389 +
  34.390 +    // search constant substring (3 chars).
  34.391 +    private final static class TestCon3 implements TestCon {
  34.392 +        private static final String constr = "x1x";
  34.393 +        public String constr() { return constr; }
  34.394 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.395 +    }
  34.396 +
  34.397 +    // search constant substring (2 chars).
  34.398 +    private final static class TestCon2 implements TestCon {
  34.399 +        private static final String constr = "1y";
  34.400 +        public String constr() { return constr; }
  34.401 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.402 +    }
  34.403 +
  34.404 +
  34.405 +    // search constant substring (1 chars).
  34.406 +    private final static class TestCon1 implements TestCon {
  34.407 +        private static final String constr = "y";
  34.408 +        public String constr() { return constr; }
  34.409 +        public int indexOf(String str) { return str.indexOf(constr); }
  34.410 +    }
  34.411 +}
  34.412 +

mercurial