Sync in generate_conjoint_long_oop_copy and refine it.

Mon, 26 Sep 2016 14:21:21 +0800

author
fujie
date
Mon, 26 Sep 2016 14:21:21 +0800
changeset 113
9c8ae03d78f5
parent 112
a55519e3b29f
child 114
eb8a5893f3c4

Sync in generate_conjoint_long_oop_copy and refine it.

src/cpu/mips/vm/stubGenerator_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Mon Sep 26 14:01:22 2016 +0800
     1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Mon Sep 26 14:21:21 2016 +0800
     1.3 @@ -1412,7 +1412,7 @@
     1.4    // cache line boundaries will still be loaded and stored atomicly.
     1.5    //
     1.6    address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
     1.7 -		Label l_2, l_3, l_4, l_stchk;
     1.8 +		Label l_2, l_4;
     1.9  		StubCodeMark mark(this, "StubRoutines", name);
    1.10  		__ align(CodeEntryAlignment);
    1.11  		address start = __ pc();
    1.12 @@ -1439,67 +1439,37 @@
    1.13  		__ move(T3, A0); 
    1.14  		__ move(T0, A1);
    1.15  
    1.16 -		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
    1.17 +		if (is_oop) {
    1.18 +			__ sync();
    1.19 +		}
    1.20 +
    1.21  		__ sll(AT, T1, Address::times_8); 
    1.22  		__ add(AT, T3, AT); 
    1.23  		__ lea(T3 , Address(AT, -8)); 
    1.24 -		//__ std();
    1.25 -		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
    1.26  		__ sll(AT, T1, Address::times_8); 
    1.27  		__ add(AT, T0, AT); 
    1.28  		__ lea(T0 , Address(AT, -8)); 
    1.29  
    1.30 -		//    __ cmpl(ecx, 32);
    1.31 -		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
    1.32 -		//  __ testl(ecx, ecx);
    1.33 -		//__ jcc(Assembler::zero, l_4);
    1.34  		__ beq(T1, R0, l_4); 
    1.35  		__ delayed()->nop();  
    1.36 -		// __ subl(edi, esi);
    1.37 +
    1.38  		__ align(16);
    1.39  		__ bind(l_2);
    1.40 -		// __ movl(edx, Address(esi));
    1.41  		__ ld(AT, T3, 0);   
    1.42 -		// __ movl(Address(esi, edi, Address::times_1), edx);
    1.43  		__ sd(AT, T0, 0); 
    1.44 -		// __ subl(esi, 4);
    1.45  		__ addi(T3, T3, -8); 
    1.46  		__ addi(T0, T0, -8); 
    1.47 -		//   __ decl(ecx);
    1.48  		__ addi(T1, T1, -1); 
    1.49 -		//__ jcc(Assembler::notEqual, l_2);
    1.50  		__ bne(T1, R0, l_2);  
    1.51  		__ delayed()->nop(); 
    1.52 +
    1.53  		if (is_oop) {
    1.54 -			// __ jmp(l_stchk);
    1.55 -			__ b( l_stchk); 
    1.56 -			__ delayed()->nop(); 
    1.57 +			__ move(T0, A1);  
    1.58 +			__ move(T1, A2);  
    1.59 +			array_store_check();
    1.60 +                        __ sync();
    1.61  		}
    1.62  		__ bind(l_4);
    1.63 -		//      __ cld();
    1.64 -		//     __ popl(edi);
    1.65 -		//    __ popl(esi);
    1.66 -		//   __ ret(0);
    1.67 -		__ pop(T8); 
    1.68 -		__ pop(T1); 
    1.69 -		__ pop(T0); 
    1.70 -		__ pop(T3); 
    1.71 -		__ jr(RA); 
    1.72 -		__ delayed()->nop(); 
    1.73 -		__ bind(l_3);
    1.74 -		//   __ rep_movl();
    1.75 -		if (is_oop) {
    1.76 -			__ bind(l_stchk);
    1.77 -			//  __ movl(edi, Address(esp, 8+ 8));
    1.78 -			__ move(T0, A1);  
    1.79 -			// __ movl(ecx, Address(esp, 8+ 12));
    1.80 -			__ move(T1, A2);  
    1.81 -			array_store_check();
    1.82 -		}
    1.83 -		//    __ cld();
    1.84 -		//   __ popl(edi);
    1.85 -		//   __ popl(esi);
    1.86 -		//  __ ret(0);
    1.87  		__ pop(T8);	
    1.88  		__ pop(T1);	
    1.89  		__ pop(T0);	
    1.90 @@ -1508,759 +1478,6 @@
    1.91  		__ delayed()->nop(); 
    1.92  		return start;
    1.93    }
    1.94 -#if 0
    1.95 -  // Arguments:
    1.96 -  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
    1.97 -  //             ignored
    1.98 -  //   is_oop  - true => oop array, so generate store check code
    1.99 -  //   name    - stub name string
   1.100 -  //
   1.101 -  // Inputs:
   1.102 -  //   c_rarg0   - source array address
   1.103 -  //   c_rarg1   - destination array address
   1.104 -  //   c_rarg2   - element count, treated as ssize_t, can be zero
   1.105 -  //
   1.106 -  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
   1.107 -    __ align(CodeEntryAlignment);
   1.108 -    StubCodeMark mark(this, "StubRoutines", name);
   1.109 -    address start = __ pc();
   1.110 -
   1.111 -    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
   1.112 -    const Register from        = rdi;  // source array address
   1.113 -    const Register to          = rsi;  // destination array address
   1.114 -    const Register qword_count = rdx;  // elements count
   1.115 -    const Register saved_count = rcx;
   1.116 -
   1.117 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   1.118 -    assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
   1.119 -
   1.120 -    address disjoint_copy_entry = NULL;
   1.121 -    if (is_oop) {
   1.122 -      assert(!UseCompressedOops, "shouldn't be called for compressed oops");
   1.123 -      disjoint_copy_entry = disjoint_oop_copy_entry;
   1.124 -      oop_copy_entry  = __ pc();
   1.125 -      array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
   1.126 -    } else {
   1.127 -      disjoint_copy_entry = disjoint_long_copy_entry;
   1.128 -      long_copy_entry = __ pc();
   1.129 -      array_overlap_test(disjoint_long_copy_entry, Address::times_8);
   1.130 -    }
   1.131 -    BLOCK_COMMENT("Entry:");
   1.132 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   1.133 -
   1.134 -    array_overlap_test(disjoint_copy_entry, Address::times_8);
   1.135 -    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
   1.136 -                      // r9 and r10 may be used to save non-volatile registers
   1.137 -
   1.138 -    // 'from', 'to' and 'qword_count' are now valid
   1.139 -
   1.140 -    if (is_oop) {
   1.141 -      // Save to and count for store barrier
   1.142 -      __ movptr(saved_count, qword_count);
   1.143 -      // No registers are destroyed by this call
   1.144 -      gen_write_ref_array_pre_barrier(to, saved_count);
   1.145 -    }
   1.146 -
   1.147 -    __ jmp(L_copy_32_bytes);
   1.148 -
   1.149 -    // Copy trailing qwords
   1.150 -  __ BIND(L_copy_8_bytes);
   1.151 -    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
   1.152 -    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
   1.153 -    __ decrement(qword_count);
   1.154 -    __ jcc(Assembler::notZero, L_copy_8_bytes);
   1.155 -
   1.156 -    if (is_oop) {
   1.157 -      __ jmp(L_exit);
   1.158 -    } else {
   1.159 -      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
   1.160 -      restore_arg_regs();
   1.161 -      __ xorptr(rax, rax); // return 0
   1.162 -      __ leave(); // required for proper stackwalking of RuntimeStub frame
   1.163 -      __ ret(0);
   1.164 -    }
   1.165 -
   1.166 -    // Copy in 32-bytes chunks
   1.167 -    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
   1.168 -
   1.169 -    if (is_oop) {
   1.170 -    __ BIND(L_exit);
   1.171 -      __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
   1.172 -      gen_write_ref_array_post_barrier(to, rcx, rax);
   1.173 -      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
   1.174 -    } else {
   1.175 -      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
   1.176 -    }
   1.177 -    restore_arg_regs();
   1.178 -    __ xorptr(rax, rax); // return 0
   1.179 -    __ leave(); // required for proper stackwalking of RuntimeStub frame
   1.180 -    __ ret(0);
   1.181 -
   1.182 -    return start;
   1.183 -  }
   1.184 -
   1.185 -
   1.186 -  // Helper for generating a dynamic type check.
   1.187 -  // Smashes no registers.
   1.188 -  void generate_type_check(Register sub_klass,
   1.189 -                           Register super_check_offset,
   1.190 -                           Register super_klass,
   1.191 -                           Label& L_success) {
   1.192 -    assert_different_registers(sub_klass, super_check_offset, super_klass);
   1.193 -
   1.194 -    BLOCK_COMMENT("type_check:");
   1.195 -
   1.196 -    Label L_miss;
   1.197 -
   1.198 -    // a couple of useful fields in sub_klass:
   1.199 -    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
   1.200 -                     Klass::secondary_supers_offset_in_bytes());
   1.201 -    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   1.202 -                     Klass::secondary_super_cache_offset_in_bytes());
   1.203 -    Address secondary_supers_addr(sub_klass, ss_offset);
   1.204 -    Address super_cache_addr(     sub_klass, sc_offset);
   1.205 -
   1.206 -    // if the pointers are equal, we are done (e.g., String[] elements)
   1.207 -    __ cmpptr(super_klass, sub_klass);
   1.208 -    __ jcc(Assembler::equal, L_success);
   1.209 -
   1.210 -    // check the supertype display:
   1.211 -    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
   1.212 -    __ cmpptr(super_klass, super_check_addr); // test the super type
   1.213 -    __ jcc(Assembler::equal, L_success);
   1.214 -
   1.215 -    // if it was a primary super, we can just fail immediately
   1.216 -    __ cmpl(super_check_offset, sc_offset);
   1.217 -    __ jcc(Assembler::notEqual, L_miss);
   1.218 -
   1.219 -    // Now do a linear scan of the secondary super-klass chain.
   1.220 -    // The repne_scan instruction uses fixed registers, which we must spill.
   1.221 -    // (We need a couple more temps in any case.)
   1.222 -    // This code is rarely used, so simplicity is a virtue here.
   1.223 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
   1.224 -    {
   1.225 -      __ push(rax);
   1.226 -      __ push(rcx);
   1.227 -      __ push(rdi);
   1.228 -      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
   1.229 -
   1.230 -      __ movptr(rdi, secondary_supers_addr);
   1.231 -      // Load the array length.
   1.232 -      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   1.233 -      // Skip to start of data.
   1.234 -      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   1.235 -      // Scan rcx words at [rdi] for occurance of rax
   1.236 -      // Set NZ/Z based on last compare
   1.237 -      __ movptr(rax, super_klass);
   1.238 -      if (UseCompressedOops) {
   1.239 -        // Compare against compressed form.  Don't need to uncompress because
   1.240 -        // looks like orig rax is restored in popq below.
   1.241 -        __ encode_heap_oop(rax);
   1.242 -        __ repne_scanl();
   1.243 -      } else {
   1.244 -        __ repne_scan();
   1.245 -      }
   1.246 -
   1.247 -      // Unspill the temp. registers:
   1.248 -      __ pop(rdi);
   1.249 -      __ pop(rcx);
   1.250 -      __ pop(rax);
   1.251 -
   1.252 -      __ jcc(Assembler::notEqual, L_miss);
   1.253 -    }
   1.254 -
   1.255 -    // Success.  Cache the super we found and proceed in triumph.
   1.256 -    __ movptr(super_cache_addr, super_klass); // note: rax is dead
   1.257 -    __ jmp(L_success);
   1.258 -
   1.259 -    // Fall through on failure!
   1.260 -    __ BIND(L_miss);
   1.261 -  }
   1.262 -
   1.263 -  //
   1.264 -  //  Generate checkcasting array copy stub
   1.265 -  //
   1.266 -  //  Input:
   1.267 -  //    c_rarg0   - source array address
   1.268 -  //    c_rarg1   - destination array address
   1.269 -  //    c_rarg2   - element count, treated as ssize_t, can be zero
   1.270 -  //    c_rarg3   - size_t ckoff (super_check_offset)
   1.271 -  // not Win64
   1.272 -  //    c_rarg4   - oop ckval (super_klass)
   1.273 -  // Win64
   1.274 -  //    rsp+40    - oop ckval (super_klass)
   1.275 -  //
   1.276 -  //  Output:
   1.277 -  //    rax ==  0  -  success
   1.278 -  //    rax == -1^K - failure, where K is partial transfer count
   1.279 -  //
   1.280 -  address generate_checkcast_copy(const char *name) {
   1.281 -
   1.282 -    Label L_load_element, L_store_element, L_do_card_marks, L_done;
   1.283 -
   1.284 -    // Input registers (after setup_arg_regs)
   1.285 -    const Register from        = rdi;   // source array address
   1.286 -    const Register to          = rsi;   // destination array address
   1.287 -    const Register length      = rdx;   // elements count
   1.288 -    const Register ckoff       = rcx;   // super_check_offset
   1.289 -    const Register ckval       = r8;    // super_klass
   1.290 -
   1.291 -    // Registers used as temps (r13, r14 are save-on-entry)
   1.292 -    const Register end_from    = from;  // source array end address
   1.293 -    const Register end_to      = r13;   // destination array end address
   1.294 -    const Register count       = rdx;   // -(count_remaining)
   1.295 -    const Register r14_length  = r14;   // saved copy of length
   1.296 -    // End pointers are inclusive, and if length is not zero they point
   1.297 -    // to the last unit copied:  end_to[0] := end_from[0]
   1.298 -
   1.299 -    const Register rax_oop    = rax;    // actual oop copied
   1.300 -    const Register r11_klass  = r11;    // oop._klass
   1.301 -
   1.302 -    //---------------------------------------------------------------
   1.303 -    // Assembler stub will be used for this call to arraycopy
   1.304 -    // if the two arrays are subtypes of Object[] but the
   1.305 -    // destination array type is not equal to or a supertype
   1.306 -    // of the source type.  Each element must be separately
   1.307 -    // checked.
   1.308 -
   1.309 -    __ align(CodeEntryAlignment);
   1.310 -    StubCodeMark mark(this, "StubRoutines", name);
   1.311 -    address start = __ pc();
   1.312 -
   1.313 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   1.314 -
   1.315 -    checkcast_copy_entry  = __ pc();
   1.316 -    BLOCK_COMMENT("Entry:");
   1.317 -
   1.318 -#ifdef ASSERT
   1.319 -    // caller guarantees that the arrays really are different
   1.320 -    // otherwise, we would have to make conjoint checks
   1.321 -    { Label L;
   1.322 -      array_overlap_test(L, TIMES_OOP);
   1.323 -      __ stop("checkcast_copy within a single array");
   1.324 -      __ bind(L);
   1.325 -    }
   1.326 -#endif //ASSERT
   1.327 -
   1.328 -    // allocate spill slots for r13, r14
   1.329 -    enum {
   1.330 -      saved_r13_offset,
   1.331 -      saved_r14_offset,
   1.332 -      saved_rbp_offset,
   1.333 -      saved_rip_offset,
   1.334 -      saved_rarg0_offset
   1.335 -    };
   1.336 -    __ subptr(rsp, saved_rbp_offset * wordSize);
   1.337 -    __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
   1.338 -    __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
   1.339 -    setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
   1.340 -                       // ckoff => rcx, ckval => r8
   1.341 -                       // r9 and r10 may be used to save non-volatile registers
   1.342 -#ifdef _WIN64
   1.343 -    // last argument (#4) is on stack on Win64
   1.344 -    const int ckval_offset = saved_rarg0_offset + 4;
   1.345 -    __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
   1.346 -#endif
   1.347 -
   1.348 -    // check that int operands are properly extended to size_t
   1.349 -    assert_clean_int(length, rax);
   1.350 -    assert_clean_int(ckoff, rax);
   1.351 -
   1.352 -#ifdef ASSERT
   1.353 -    BLOCK_COMMENT("assert consistent ckoff/ckval");
   1.354 -    // The ckoff and ckval must be mutually consistent,
   1.355 -    // even though caller generates both.
   1.356 -    { Label L;
   1.357 -      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
   1.358 -                        Klass::super_check_offset_offset_in_bytes());
   1.359 -      __ cmpl(ckoff, Address(ckval, sco_offset));
   1.360 -      __ jcc(Assembler::equal, L);
   1.361 -      __ stop("super_check_offset inconsistent");
   1.362 -      __ bind(L);
   1.363 -    }
   1.364 -#endif //ASSERT
   1.365 -
   1.366 -    // Loop-invariant addresses.  They are exclusive end pointers.
   1.367 -    Address end_from_addr(from, length, TIMES_OOP, 0);
   1.368 -    Address   end_to_addr(to,   length, TIMES_OOP, 0);
   1.369 -    // Loop-variant addresses.  They assume post-incremented count < 0.
   1.370 -    Address from_element_addr(end_from, count, TIMES_OOP, 0);
   1.371 -    Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
   1.372 -
   1.373 -    gen_write_ref_array_pre_barrier(to, count);
   1.374 -
   1.375 -    // Copy from low to high addresses, indexed from the end of each array.
   1.376 -    __ lea(end_from, end_from_addr);
   1.377 -    __ lea(end_to,   end_to_addr);
   1.378 -    __ movptr(r14_length, length);        // save a copy of the length
   1.379 -    assert(length == count, "");          // else fix next line:
   1.380 -    __ negptr(count);                     // negate and test the length
   1.381 -    __ jcc(Assembler::notZero, L_load_element);
   1.382 -
   1.383 -    // Empty array:  Nothing to do.
   1.384 -    __ xorptr(rax, rax);                  // return 0 on (trivial) success
   1.385 -    __ jmp(L_done);
   1.386 -
   1.387 -    // ======== begin loop ========
   1.388 -    // (Loop is rotated; its entry is L_load_element.)
   1.389 -    // Loop control:
   1.390 -    //   for (count = -count; count != 0; count++)
   1.391 -    // Base pointers src, dst are biased by 8*(count-1),to last element.
   1.392 -    __ align(16);
   1.393 -
   1.394 -    __ BIND(L_store_element);
   1.395 -    __ store_heap_oop(rax_oop, to_element_addr);  // store the oop
   1.396 -    __ sync();
   1.397 -    __ increment(count);               // increment the count toward zero
   1.398 -    __ jcc(Assembler::zero, L_do_card_marks);
   1.399 -
   1.400 -    // ======== loop entry is here ========
   1.401 -    __ BIND(L_load_element);
   1.402 -    __ load_heap_oop(rax_oop, from_element_addr); // load the oop
   1.403 -    __ testptr(rax_oop, rax_oop);
   1.404 -    __ jcc(Assembler::zero, L_store_element);
   1.405 -
   1.406 -    __ load_klass(r11_klass, rax_oop);// query the object klass
   1.407 -    generate_type_check(r11_klass, ckoff, ckval, L_store_element);
   1.408 -    // ======== end loop ========
   1.409 -
   1.410 -    // It was a real error; we must depend on the caller to finish the job.
   1.411 -    // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
   1.412 -    // Emit GC store barriers for the oops we have copied (r14 + rdx),
   1.413 -    // and report their number to the caller.
   1.414 -    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
   1.415 -    __ lea(end_to, to_element_addr);
   1.416 -    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
   1.417 -    __ movptr(rax, r14_length);           // original oops
   1.418 -    __ addptr(rax, count);                // K = (original - remaining) oops
   1.419 -    __ notptr(rax);                       // report (-1^K) to caller
   1.420 -    __ jmp(L_done);
   1.421 -
   1.422 -    // Come here on success only.
   1.423 -    __ BIND(L_do_card_marks);
   1.424 -    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
   1.425 -    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
   1.426 -    __ xorptr(rax, rax);                  // return 0 on success
   1.427 -
   1.428 -    // Common exit point (success or failure).
   1.429 -    __ BIND(L_done);
   1.430 -    __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
   1.431 -    __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
   1.432 -    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
   1.433 -    restore_arg_regs();
   1.434 -    __ leave(); // required for proper stackwalking of RuntimeStub frame
   1.435 -    __ ret(0);
   1.436 -
   1.437 -    return start;
   1.438 -  }
   1.439 -
   1.440 -  //
   1.441 -  //  Generate 'unsafe' array copy stub
   1.442 -  //  Though just as safe as the other stubs, it takes an unscaled
   1.443 -  //  size_t argument instead of an element count.
   1.444 -  //
   1.445 -  //  Input:
   1.446 -  //    c_rarg0   - source array address
   1.447 -  //    c_rarg1   - destination array address
   1.448 -  //    c_rarg2   - byte count, treated as ssize_t, can be zero
   1.449 -  //
   1.450 -  // Examines the alignment of the operands and dispatches
   1.451 -  // to a long, int, short, or byte copy loop.
   1.452 -  //
   1.453 -  address generate_unsafe_copy(const char *name) {
   1.454 -
   1.455 -    Label L_long_aligned, L_int_aligned, L_short_aligned;
   1.456 -
   1.457 -    // Input registers (before setup_arg_regs)
   1.458 -    const Register from        = c_rarg0;  // source array address
   1.459 -    const Register to          = c_rarg1;  // destination array address
   1.460 -    const Register size        = c_rarg2;  // byte count (size_t)
   1.461 -
   1.462 -    // Register used as a temp
   1.463 -    const Register bits        = rax;      // test copy of low bits
   1.464 -
   1.465 -    __ align(CodeEntryAlignment);
   1.466 -    StubCodeMark mark(this, "StubRoutines", name);
   1.467 -    address start = __ pc();
   1.468 -
   1.469 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   1.470 -
   1.471 -    // bump this on entry, not on exit:
   1.472 -    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
   1.473 -
   1.474 -    __ mov(bits, from);
   1.475 -    __ orptr(bits, to);
   1.476 -    __ orptr(bits, size);
   1.477 -
   1.478 -    __ testb(bits, BytesPerLong-1);
   1.479 -    __ jccb(Assembler::zero, L_long_aligned);
   1.480 -
   1.481 -    __ testb(bits, BytesPerInt-1);
   1.482 -    __ jccb(Assembler::zero, L_int_aligned);
   1.483 -
   1.484 -    __ testb(bits, BytesPerShort-1);
   1.485 -    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
   1.486 -
   1.487 -    __ BIND(L_short_aligned);
   1.488 -    __ shrptr(size, LogBytesPerShort); // size => short_count
   1.489 -    __ jump(RuntimeAddress(short_copy_entry));
   1.490 -
   1.491 -    __ BIND(L_int_aligned);
   1.492 -    __ shrptr(size, LogBytesPerInt); // size => int_count
   1.493 -    __ jump(RuntimeAddress(int_copy_entry));
   1.494 -
   1.495 -    __ BIND(L_long_aligned);
   1.496 -    __ shrptr(size, LogBytesPerLong); // size => qword_count
   1.497 -    __ jump(RuntimeAddress(long_copy_entry));
   1.498 -
   1.499 -    return start;
   1.500 -  }
   1.501 -
   1.502 -  // Perform range checks on the proposed arraycopy.
   1.503 -  // Kills temp, but nothing else.
   1.504 -  // Also, clean the sign bits of src_pos and dst_pos.
   1.505 -  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
   1.506 -                              Register src_pos, // source position (c_rarg1)
   1.507 -                              Register dst,     // destination array oo (c_rarg2)
   1.508 -                              Register dst_pos, // destination position (c_rarg3)
   1.509 -                              Register length,
   1.510 -                              Register temp,
   1.511 -                              Label& L_failed) {
   1.512 -    BLOCK_COMMENT("arraycopy_range_checks:");
   1.513 -
   1.514 -    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
   1.515 -    __ movl(temp, length);
   1.516 -    __ addl(temp, src_pos);             // src_pos + length
   1.517 -    __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
   1.518 -    __ jcc(Assembler::above, L_failed);
   1.519 -
   1.520 -    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
   1.521 -    __ movl(temp, length);
   1.522 -    __ addl(temp, dst_pos);             // dst_pos + length
   1.523 -    __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
   1.524 -    __ jcc(Assembler::above, L_failed);
   1.525 -
   1.526 -    // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
   1.527 -    // Move with sign extension can be used since they are positive.
   1.528 -    __ movslq(src_pos, src_pos);
   1.529 -    __ movslq(dst_pos, dst_pos);
   1.530 -
   1.531 -    BLOCK_COMMENT("arraycopy_range_checks done");
   1.532 -  }
   1.533 -
   1.534 -  //
   1.535 -  //  Generate generic array copy stubs
   1.536 -  //
   1.537 -  //  Input:
   1.538 -  //    c_rarg0    -  src oop
   1.539 -  //    c_rarg1    -  src_pos (32-bits)
   1.540 -  //    c_rarg2    -  dst oop
   1.541 -  //    c_rarg3    -  dst_pos (32-bits)
   1.542 -  // not Win64
   1.543 -  //    c_rarg4    -  element count (32-bits)
   1.544 -  // Win64
   1.545 -  //    rsp+40     -  element count (32-bits)
   1.546 -  //
   1.547 -  //  Output:
   1.548 -  //    rax ==  0  -  success
   1.549 -  //    rax == -1^K - failure, where K is partial transfer count
   1.550 -  //
   1.551 -  address generate_generic_copy(const char *name) {
   1.552 -
   1.553 -    Label L_failed, L_failed_0, L_objArray;
   1.554 -    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
   1.555 -
   1.556 -    // Input registers
   1.557 -    const Register src        = c_rarg0;  // source array oop
   1.558 -    const Register src_pos    = c_rarg1;  // source position
   1.559 -    const Register dst        = c_rarg2;  // destination array oop
   1.560 -    const Register dst_pos    = c_rarg3;  // destination position
   1.561 -    // elements count is on stack on Win64
   1.562 -#ifdef _WIN64
   1.563 -#define C_RARG4 Address(rsp, 6 * wordSize)
   1.564 -#else
   1.565 -#define C_RARG4 c_rarg4
   1.566 -#endif
   1.567 -
   1.568 -    { int modulus = CodeEntryAlignment;
   1.569 -      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
   1.570 -      int advance = target - (__ offset() % modulus);
   1.571 -      if (advance < 0)  advance += modulus;
   1.572 -      if (advance > 0)  __ nop(advance);
   1.573 -    }
   1.574 -    StubCodeMark mark(this, "StubRoutines", name);
   1.575 -
   1.576 -    // Short-hop target to L_failed.  Makes for denser prologue code.
   1.577 -    __ BIND(L_failed_0);
   1.578 -    __ jmp(L_failed);
   1.579 -    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
   1.580 -
   1.581 -    __ align(CodeEntryAlignment);
   1.582 -    address start = __ pc();
   1.583 -
   1.584 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   1.585 -
   1.586 -    // bump this on entry, not on exit:
   1.587 -    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
   1.588 -
   1.589 -    //-----------------------------------------------------------------------
   1.590 -    // Assembler stub will be used for this call to arraycopy
   1.591 -    // if the following conditions are met:
   1.592 -    //
   1.593 -    // (1) src and dst must not be null.
   1.594 -    // (2) src_pos must not be negative.
   1.595 -    // (3) dst_pos must not be negative.
   1.596 -    // (4) length  must not be negative.
   1.597 -    // (5) src klass and dst klass should be the same and not NULL.
   1.598 -    // (6) src and dst should be arrays.
   1.599 -    // (7) src_pos + length must not exceed length of src.
   1.600 -    // (8) dst_pos + length must not exceed length of dst.
   1.601 -    //
   1.602 -
   1.603 -    //  if (src == NULL) return -1;
   1.604 -    __ testptr(src, src);         // src oop
   1.605 -    size_t j1off = __ offset();
   1.606 -    __ jccb(Assembler::zero, L_failed_0);
   1.607 -
   1.608 -    //  if (src_pos < 0) return -1;
   1.609 -    __ testl(src_pos, src_pos); // src_pos (32-bits)
   1.610 -    __ jccb(Assembler::negative, L_failed_0);
   1.611 -
   1.612 -    //  if (dst == NULL) return -1;
   1.613 -    __ testptr(dst, dst);         // dst oop
   1.614 -    __ jccb(Assembler::zero, L_failed_0);
   1.615 -
   1.616 -    //  if (dst_pos < 0) return -1;
   1.617 -    __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
   1.618 -    size_t j4off = __ offset();
   1.619 -    __ jccb(Assembler::negative, L_failed_0);
   1.620 -
   1.621 -    // The first four tests are very dense code,
   1.622 -    // but not quite dense enough to put four
   1.623 -    // jumps in a 16-byte instruction fetch buffer.
   1.624 -    // That's good, because some branch predicters
   1.625 -    // do not like jumps so close together.
   1.626 -    // Make sure of this.
   1.627 -    guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
   1.628 -
   1.629 -    // registers used as temp
   1.630 -    const Register r11_length    = r11; // elements count to copy
   1.631 -    const Register r10_src_klass = r10; // array klass
   1.632 -    const Register r9_dst_klass  = r9;  // dest array klass
   1.633 -
   1.634 -    //  if (length < 0) return -1;
   1.635 -    __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
   1.636 -    __ testl(r11_length, r11_length);
   1.637 -    __ jccb(Assembler::negative, L_failed_0);
   1.638 -
   1.639 -    __ load_klass(r10_src_klass, src);
   1.640 -#ifdef ASSERT
   1.641 -    //  assert(src->klass() != NULL);
   1.642 -    BLOCK_COMMENT("assert klasses not null");
   1.643 -    { Label L1, L2;
   1.644 -      __ testptr(r10_src_klass, r10_src_klass);
   1.645 -      __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
   1.646 -      __ bind(L1);
   1.647 -      __ stop("broken null klass");
   1.648 -      __ bind(L2);
   1.649 -      __ load_klass(r9_dst_klass, dst);
   1.650 -      __ cmpq(r9_dst_klass, 0);
   1.651 -      __ jcc(Assembler::equal, L1);     // this would be broken also
   1.652 -      BLOCK_COMMENT("assert done");
   1.653 -    }
   1.654 -#endif
   1.655 -
   1.656 -    // Load layout helper (32-bits)
   1.657 -    //
   1.658 -    //  |array_tag|     | header_size | element_type |     |log2_element_size|
   1.659 -    // 32        30    24            16              8     2                 0
   1.660 -    //
   1.661 -    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
   1.662 -    //
   1.663 -
   1.664 -    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
   1.665 -                    Klass::layout_helper_offset_in_bytes();
   1.666 -
   1.667 -    const Register rax_lh = rax;  // layout helper
   1.668 -
   1.669 -    __ movl(rax_lh, Address(r10_src_klass, lh_offset));
   1.670 -
   1.671 -    // Handle objArrays completely differently...
   1.672 -    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
   1.673 -    __ cmpl(rax_lh, objArray_lh);
   1.674 -    __ jcc(Assembler::equal, L_objArray);
   1.675 -
   1.676 -    //  if (src->klass() != dst->klass()) return -1;
   1.677 -    __ load_klass(r9_dst_klass, dst);
   1.678 -    __ cmpq(r10_src_klass, r9_dst_klass);
   1.679 -    __ jcc(Assembler::notEqual, L_failed);
   1.680 -
   1.681 -    //  if (!src->is_Array()) return -1;
   1.682 -    __ cmpl(rax_lh, Klass::_lh_neutral_value);
   1.683 -    __ jcc(Assembler::greaterEqual, L_failed);
   1.684 -
   1.685 -    // At this point, it is known to be a typeArray (array_tag 0x3).
   1.686 -#ifdef ASSERT
   1.687 -    { Label L;
   1.688 -      __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
   1.689 -      __ jcc(Assembler::greaterEqual, L);
   1.690 -      __ stop("must be a primitive array");
   1.691 -      __ bind(L);
   1.692 -    }
   1.693 -#endif
   1.694 -
   1.695 -    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
   1.696 -                           r10, L_failed);
   1.697 -
   1.698 -    // typeArrayKlass
   1.699 -    //
   1.700 -    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
   1.701 -    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
   1.702 -    //
   1.703 -
   1.704 -    const Register r10_offset = r10;    // array offset
   1.705 -    const Register rax_elsize = rax_lh; // element size
   1.706 -
   1.707 -    __ movl(r10_offset, rax_lh);
   1.708 -    __ shrl(r10_offset, Klass::_lh_header_size_shift);
   1.709 -    __ andptr(r10_offset, Klass::_lh_header_size_mask);   // array_offset
   1.710 -    __ addptr(src, r10_offset);           // src array offset
   1.711 -    __ addptr(dst, r10_offset);           // dst array offset
   1.712 -    BLOCK_COMMENT("choose copy loop based on element size");
   1.713 -    __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
   1.714 -
   1.715 -    // next registers should be set before the jump to corresponding stub
   1.716 -    const Register from     = c_rarg0;  // source array address
   1.717 -    const Register to       = c_rarg1;  // destination array address
   1.718 -    const Register count    = c_rarg2;  // elements count
   1.719 -
   1.720 -    // 'from', 'to', 'count' registers should be set in such order
   1.721 -    // since they are the same as 'src', 'src_pos', 'dst'.
   1.722 -
   1.723 -  __ BIND(L_copy_bytes);
   1.724 -    __ cmpl(rax_elsize, 0);
   1.725 -    __ jccb(Assembler::notEqual, L_copy_shorts);
   1.726 -    __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
   1.727 -    __ lea(to,   Address(dst, dst_pos, Address::times_1, 0));// dst_addr
   1.728 -    __ movl2ptr(count, r11_length); // length
   1.729 -    __ jump(RuntimeAddress(byte_copy_entry));
   1.730 -
   1.731 -  __ BIND(L_copy_shorts);
   1.732 -    __ cmpl(rax_elsize, LogBytesPerShort);
   1.733 -    __ jccb(Assembler::notEqual, L_copy_ints);
   1.734 -    __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
   1.735 -    __ lea(to,   Address(dst, dst_pos, Address::times_2, 0));// dst_addr
   1.736 -    __ movl2ptr(count, r11_length); // length
   1.737 -    __ jump(RuntimeAddress(short_copy_entry));
   1.738 -
   1.739 -  __ BIND(L_copy_ints);
   1.740 -    __ cmpl(rax_elsize, LogBytesPerInt);
   1.741 -    __ jccb(Assembler::notEqual, L_copy_longs);
   1.742 -    __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
   1.743 -    __ lea(to,   Address(dst, dst_pos, Address::times_4, 0));// dst_addr
   1.744 -    __ movl2ptr(count, r11_length); // length
   1.745 -    __ jump(RuntimeAddress(int_copy_entry));
   1.746 -
   1.747 -  __ BIND(L_copy_longs);
   1.748 -#ifdef ASSERT
   1.749 -    { Label L;
   1.750 -      __ cmpl(rax_elsize, LogBytesPerLong);
   1.751 -      __ jcc(Assembler::equal, L);
   1.752 -      __ stop("must be long copy, but elsize is wrong");
   1.753 -      __ bind(L);
   1.754 -    }
   1.755 -#endif
   1.756 -    __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
   1.757 -    __ lea(to,   Address(dst, dst_pos, Address::times_8, 0));// dst_addr
   1.758 -    __ movl2ptr(count, r11_length); // length
   1.759 -    __ jump(RuntimeAddress(long_copy_entry));
   1.760 -
   1.761 -    // objArrayKlass
   1.762 -  __ BIND(L_objArray);
   1.763 -    // live at this point:  r10_src_klass, src[_pos], dst[_pos]
   1.764 -
   1.765 -    Label L_plain_copy, L_checkcast_copy;
   1.766 -    //  test array classes for subtyping
   1.767 -    __ load_klass(r9_dst_klass, dst);
   1.768 -    __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
   1.769 -    __ jcc(Assembler::notEqual, L_checkcast_copy);
   1.770 -
   1.771 -    // Identically typed arrays can be copied without element-wise checks.
   1.772 -    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
   1.773 -                           r10, L_failed);
   1.774 -
   1.775 -    __ lea(from, Address(src, src_pos, TIMES_OOP,
   1.776 -                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
   1.777 -    __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
   1.778 -                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
   1.779 -    __ movl2ptr(count, r11_length); // length
   1.780 -  __ BIND(L_plain_copy);
   1.781 -    __ jump(RuntimeAddress(oop_copy_entry));
   1.782 -
   1.783 -  __ BIND(L_checkcast_copy);
   1.784 -    // live at this point:  r10_src_klass, !r11_length
   1.785 -    {
   1.786 -      // assert(r11_length == C_RARG4); // will reload from here
   1.787 -      Register r11_dst_klass = r11;
   1.788 -      __ load_klass(r11_dst_klass, dst);
   1.789 -
   1.790 -      // Before looking at dst.length, make sure dst is also an objArray.
   1.791 -      __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
   1.792 -      __ jcc(Assembler::notEqual, L_failed);
   1.793 -
   1.794 -      // It is safe to examine both src.length and dst.length.
   1.795 -#ifndef _WIN64
   1.796 -      arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
   1.797 -                             rax, L_failed);
   1.798 -#else
   1.799 -      __ movl(r11_length, C_RARG4);     // reload
   1.800 -      arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
   1.801 -                             rax, L_failed);
   1.802 -      __ load_klass(r11_dst_klass, dst); // reload
   1.803 -#endif
   1.804 -
   1.805 -      // Marshal the base address arguments now, freeing registers.
   1.806 -      __ lea(from, Address(src, src_pos, TIMES_OOP,
   1.807 -                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.808 -      __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
   1.809 -                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
   1.810 -      __ movl(count, C_RARG4);          // length (reloaded)
   1.811 -      Register sco_temp = c_rarg3;      // this register is free now
   1.812 -      assert_different_registers(from, to, count, sco_temp,
   1.813 -                                 r11_dst_klass, r10_src_klass);
   1.814 -      assert_clean_int(count, sco_temp);
   1.815 -
   1.816 -      // Generate the type check.
   1.817 -      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
   1.818 -                        Klass::super_check_offset_offset_in_bytes());
   1.819 -      __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
   1.820 -      assert_clean_int(sco_temp, rax);
   1.821 -      generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
   1.822 -
   1.823 -      // Fetch destination element klass from the objArrayKlass header.
   1.824 -      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
   1.825 -                       objArrayKlass::element_klass_offset_in_bytes());
   1.826 -      __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
   1.827 -      __ movl(sco_temp,      Address(r11_dst_klass, sco_offset));
   1.828 -      assert_clean_int(sco_temp, rax);
   1.829 -
   1.830 -      // the checkcast_copy loop needs two extra arguments:
   1.831 -      assert(c_rarg3 == sco_temp, "#3 already in place");
   1.832 -      __ movptr(C_RARG4, r11_dst_klass);  // dst.klass.element_klass
   1.833 -      __ jump(RuntimeAddress(checkcast_copy_entry));
   1.834 -    }
   1.835 -
   1.836 -  __ BIND(L_failed);
   1.837 -    __ xorptr(rax, rax);
   1.838 -    __ notptr(rax); // return -1
   1.839 -    __ leave();   // required for proper stackwalking of RuntimeStub frame
   1.840 -    __ ret(0);
   1.841 -
   1.842 -    return start;
   1.843 -  }
   1.844 -
   1.845 -#undef length_arg
   1.846 -#endif
   1.847  
   1.848  //FIXME
   1.849    address generate_disjoint_long_copy(bool aligned, const char *name) {

mercurial