Mon, 26 Sep 2016 14:21:21 +0800
Sync in generate_conjoint_long_oop_copy and refine it.
src/cpu/mips/vm/stubGenerator_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Mon Sep 26 14:01:22 2016 +0800 1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Mon Sep 26 14:21:21 2016 +0800 1.3 @@ -1412,7 +1412,7 @@ 1.4 // cache line boundaries will still be loaded and stored atomicly. 1.5 // 1.6 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 1.7 - Label l_2, l_3, l_4, l_stchk; 1.8 + Label l_2, l_4; 1.9 StubCodeMark mark(this, "StubRoutines", name); 1.10 __ align(CodeEntryAlignment); 1.11 address start = __ pc(); 1.12 @@ -1439,67 +1439,37 @@ 1.13 __ move(T3, A0); 1.14 __ move(T0, A1); 1.15 1.16 - //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 1.17 + if (is_oop) { 1.18 + __ sync(); 1.19 + } 1.20 + 1.21 __ sll(AT, T1, Address::times_8); 1.22 __ add(AT, T3, AT); 1.23 __ lea(T3 , Address(AT, -8)); 1.24 - //__ std(); 1.25 - //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 1.26 __ sll(AT, T1, Address::times_8); 1.27 __ add(AT, T0, AT); 1.28 __ lea(T0 , Address(AT, -8)); 1.29 1.30 - // __ cmpl(ecx, 32); 1.31 - // __ jcc(Assembler::above, l_3); // > 32 dwords 1.32 - // __ testl(ecx, ecx); 1.33 - //__ jcc(Assembler::zero, l_4); 1.34 __ beq(T1, R0, l_4); 1.35 __ delayed()->nop(); 1.36 - // __ subl(edi, esi); 1.37 + 1.38 __ align(16); 1.39 __ bind(l_2); 1.40 - // __ movl(edx, Address(esi)); 1.41 __ ld(AT, T3, 0); 1.42 - // __ movl(Address(esi, edi, Address::times_1), edx); 1.43 __ sd(AT, T0, 0); 1.44 - // __ subl(esi, 4); 1.45 __ addi(T3, T3, -8); 1.46 __ addi(T0, T0, -8); 1.47 - // __ decl(ecx); 1.48 __ addi(T1, T1, -1); 1.49 - //__ jcc(Assembler::notEqual, l_2); 1.50 __ bne(T1, R0, l_2); 1.51 __ delayed()->nop(); 1.52 + 1.53 if (is_oop) { 1.54 - // __ jmp(l_stchk); 1.55 - __ b( l_stchk); 1.56 - __ delayed()->nop(); 1.57 + __ move(T0, A1); 1.58 + __ move(T1, A2); 1.59 + array_store_check(); 1.60 + __ sync(); 1.61 } 1.62 __ bind(l_4); 1.63 - // __ cld(); 1.64 - // __ popl(edi); 1.65 - // __ popl(esi); 1.66 - // __ ret(0); 1.67 - __ pop(T8); 1.68 - __ pop(T1); 1.69 - __ pop(T0); 1.70 - __ pop(T3); 1.71 - __ jr(RA); 1.72 - __ delayed()->nop(); 1.73 - __ bind(l_3); 1.74 - // __ rep_movl(); 1.75 - if (is_oop) { 1.76 - __ bind(l_stchk); 1.77 - // __ movl(edi, Address(esp, 8+ 8)); 1.78 - __ move(T0, A1); 1.79 - // __ movl(ecx, Address(esp, 8+ 12)); 1.80 - __ move(T1, A2); 1.81 - array_store_check(); 1.82 - } 1.83 - // __ cld(); 1.84 - // __ popl(edi); 1.85 - // __ popl(esi); 1.86 - // __ ret(0); 1.87 __ pop(T8); 1.88 __ pop(T1); 1.89 __ pop(T0); 1.90 @@ -1508,759 +1478,6 @@ 1.91 __ delayed()->nop(); 1.92 return start; 1.93 } 1.94 -#if 0 1.95 - // Arguments: 1.96 - // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 1.97 - // ignored 1.98 - // is_oop - true => oop array, so generate store check code 1.99 - // name - stub name string 1.100 - // 1.101 - // Inputs: 1.102 - // c_rarg0 - source array address 1.103 - // c_rarg1 - destination array address 1.104 - // c_rarg2 - element count, treated as ssize_t, can be zero 1.105 - // 1.106 - address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 1.107 - __ align(CodeEntryAlignment); 1.108 - StubCodeMark mark(this, "StubRoutines", name); 1.109 - address start = __ pc(); 1.110 - 1.111 - Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 1.112 - const Register from = rdi; // source array address 1.113 - const Register to = rsi; // destination array address 1.114 - const Register qword_count = rdx; // elements count 1.115 - const Register saved_count = rcx; 1.116 - 1.117 - __ enter(); // required for proper stackwalking of RuntimeStub frame 1.118 - assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1.119 - 1.120 - address disjoint_copy_entry = NULL; 1.121 - if (is_oop) { 1.122 - assert(!UseCompressedOops, "shouldn't be called for compressed oops"); 1.123 - disjoint_copy_entry = disjoint_oop_copy_entry; 1.124 - oop_copy_entry = __ pc(); 1.125 - array_overlap_test(disjoint_oop_copy_entry, Address::times_8); 1.126 - } else { 1.127 - disjoint_copy_entry = disjoint_long_copy_entry; 1.128 - long_copy_entry = __ pc(); 1.129 - array_overlap_test(disjoint_long_copy_entry, Address::times_8); 1.130 - } 1.131 - BLOCK_COMMENT("Entry:"); 1.132 - // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1.133 - 1.134 - array_overlap_test(disjoint_copy_entry, Address::times_8); 1.135 - setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1.136 - // r9 and r10 may be used to save non-volatile registers 1.137 - 1.138 - // 'from', 'to' and 'qword_count' are now valid 1.139 - 1.140 - if (is_oop) { 1.141 - // Save to and count for store barrier 1.142 - __ movptr(saved_count, qword_count); 1.143 - // No registers are destroyed by this call 1.144 - gen_write_ref_array_pre_barrier(to, saved_count); 1.145 - } 1.146 - 1.147 - __ jmp(L_copy_32_bytes); 1.148 - 1.149 - // Copy trailing qwords 1.150 - __ BIND(L_copy_8_bytes); 1.151 - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 1.152 - __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1.153 - __ decrement(qword_count); 1.154 - __ jcc(Assembler::notZero, L_copy_8_bytes); 1.155 - 1.156 - if (is_oop) { 1.157 - __ jmp(L_exit); 1.158 - } else { 1.159 - inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); 1.160 - restore_arg_regs(); 1.161 - __ xorptr(rax, rax); // return 0 1.162 - __ leave(); // required for proper stackwalking of RuntimeStub frame 1.163 - __ ret(0); 1.164 - } 1.165 - 1.166 - // Copy in 32-bytes chunks 1.167 - copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1.168 - 1.169 - if (is_oop) { 1.170 - __ BIND(L_exit); 1.171 - __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); 1.172 - gen_write_ref_array_post_barrier(to, rcx, rax); 1.173 - inc_counter_np(SharedRuntime::_oop_array_copy_ctr); 1.174 - } else { 1.175 - inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); 1.176 - } 1.177 - restore_arg_regs(); 1.178 - __ xorptr(rax, rax); // return 0 1.179 - __ leave(); // required for proper stackwalking of RuntimeStub frame 1.180 - __ ret(0); 1.181 - 1.182 - return start; 1.183 - } 1.184 - 1.185 - 1.186 - // Helper for generating a dynamic type check. 1.187 - // Smashes no registers. 1.188 - void generate_type_check(Register sub_klass, 1.189 - Register super_check_offset, 1.190 - Register super_klass, 1.191 - Label& L_success) { 1.192 - assert_different_registers(sub_klass, super_check_offset, super_klass); 1.193 - 1.194 - BLOCK_COMMENT("type_check:"); 1.195 - 1.196 - Label L_miss; 1.197 - 1.198 - // a couple of useful fields in sub_klass: 1.199 - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 1.200 - Klass::secondary_supers_offset_in_bytes()); 1.201 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 1.202 - Klass::secondary_super_cache_offset_in_bytes()); 1.203 - Address secondary_supers_addr(sub_klass, ss_offset); 1.204 - Address super_cache_addr( sub_klass, sc_offset); 1.205 - 1.206 - // if the pointers are equal, we are done (e.g., String[] elements) 1.207 - __ cmpptr(super_klass, sub_klass); 1.208 - __ jcc(Assembler::equal, L_success); 1.209 - 1.210 - // check the supertype display: 1.211 - Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 1.212 - __ cmpptr(super_klass, super_check_addr); // test the super type 1.213 - __ jcc(Assembler::equal, L_success); 1.214 - 1.215 - // if it was a primary super, we can just fail immediately 1.216 - __ cmpl(super_check_offset, sc_offset); 1.217 - __ jcc(Assembler::notEqual, L_miss); 1.218 - 1.219 - // Now do a linear scan of the secondary super-klass chain. 1.220 - // The repne_scan instruction uses fixed registers, which we must spill. 1.221 - // (We need a couple more temps in any case.) 1.222 - // This code is rarely used, so simplicity is a virtue here. 1.223 - inc_counter_np(SharedRuntime::_partial_subtype_ctr); 1.224 - { 1.225 - __ push(rax); 1.226 - __ push(rcx); 1.227 - __ push(rdi); 1.228 - assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); 1.229 - 1.230 - __ movptr(rdi, secondary_supers_addr); 1.231 - // Load the array length. 1.232 - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 1.233 - // Skip to start of data. 1.234 - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 1.235 - // Scan rcx words at [rdi] for occurance of rax 1.236 - // Set NZ/Z based on last compare 1.237 - __ movptr(rax, super_klass); 1.238 - if (UseCompressedOops) { 1.239 - // Compare against compressed form. Don't need to uncompress because 1.240 - // looks like orig rax is restored in popq below. 1.241 - __ encode_heap_oop(rax); 1.242 - __ repne_scanl(); 1.243 - } else { 1.244 - __ repne_scan(); 1.245 - } 1.246 - 1.247 - // Unspill the temp. registers: 1.248 - __ pop(rdi); 1.249 - __ pop(rcx); 1.250 - __ pop(rax); 1.251 - 1.252 - __ jcc(Assembler::notEqual, L_miss); 1.253 - } 1.254 - 1.255 - // Success. Cache the super we found and proceed in triumph. 1.256 - __ movptr(super_cache_addr, super_klass); // note: rax is dead 1.257 - __ jmp(L_success); 1.258 - 1.259 - // Fall through on failure! 1.260 - __ BIND(L_miss); 1.261 - } 1.262 - 1.263 - // 1.264 - // Generate checkcasting array copy stub 1.265 - // 1.266 - // Input: 1.267 - // c_rarg0 - source array address 1.268 - // c_rarg1 - destination array address 1.269 - // c_rarg2 - element count, treated as ssize_t, can be zero 1.270 - // c_rarg3 - size_t ckoff (super_check_offset) 1.271 - // not Win64 1.272 - // c_rarg4 - oop ckval (super_klass) 1.273 - // Win64 1.274 - // rsp+40 - oop ckval (super_klass) 1.275 - // 1.276 - // Output: 1.277 - // rax == 0 - success 1.278 - // rax == -1^K - failure, where K is partial transfer count 1.279 - // 1.280 - address generate_checkcast_copy(const char *name) { 1.281 - 1.282 - Label L_load_element, L_store_element, L_do_card_marks, L_done; 1.283 - 1.284 - // Input registers (after setup_arg_regs) 1.285 - const Register from = rdi; // source array address 1.286 - const Register to = rsi; // destination array address 1.287 - const Register length = rdx; // elements count 1.288 - const Register ckoff = rcx; // super_check_offset 1.289 - const Register ckval = r8; // super_klass 1.290 - 1.291 - // Registers used as temps (r13, r14 are save-on-entry) 1.292 - const Register end_from = from; // source array end address 1.293 - const Register end_to = r13; // destination array end address 1.294 - const Register count = rdx; // -(count_remaining) 1.295 - const Register r14_length = r14; // saved copy of length 1.296 - // End pointers are inclusive, and if length is not zero they point 1.297 - // to the last unit copied: end_to[0] := end_from[0] 1.298 - 1.299 - const Register rax_oop = rax; // actual oop copied 1.300 - const Register r11_klass = r11; // oop._klass 1.301 - 1.302 - //--------------------------------------------------------------- 1.303 - // Assembler stub will be used for this call to arraycopy 1.304 - // if the two arrays are subtypes of Object[] but the 1.305 - // destination array type is not equal to or a supertype 1.306 - // of the source type. Each element must be separately 1.307 - // checked. 1.308 - 1.309 - __ align(CodeEntryAlignment); 1.310 - StubCodeMark mark(this, "StubRoutines", name); 1.311 - address start = __ pc(); 1.312 - 1.313 - __ enter(); // required for proper stackwalking of RuntimeStub frame 1.314 - 1.315 - checkcast_copy_entry = __ pc(); 1.316 - BLOCK_COMMENT("Entry:"); 1.317 - 1.318 -#ifdef ASSERT 1.319 - // caller guarantees that the arrays really are different 1.320 - // otherwise, we would have to make conjoint checks 1.321 - { Label L; 1.322 - array_overlap_test(L, TIMES_OOP); 1.323 - __ stop("checkcast_copy within a single array"); 1.324 - __ bind(L); 1.325 - } 1.326 -#endif //ASSERT 1.327 - 1.328 - // allocate spill slots for r13, r14 1.329 - enum { 1.330 - saved_r13_offset, 1.331 - saved_r14_offset, 1.332 - saved_rbp_offset, 1.333 - saved_rip_offset, 1.334 - saved_rarg0_offset 1.335 - }; 1.336 - __ subptr(rsp, saved_rbp_offset * wordSize); 1.337 - __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); 1.338 - __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); 1.339 - setup_arg_regs(4); // from => rdi, to => rsi, length => rdx 1.340 - // ckoff => rcx, ckval => r8 1.341 - // r9 and r10 may be used to save non-volatile registers 1.342 -#ifdef _WIN64 1.343 - // last argument (#4) is on stack on Win64 1.344 - const int ckval_offset = saved_rarg0_offset + 4; 1.345 - __ movptr(ckval, Address(rsp, ckval_offset * wordSize)); 1.346 -#endif 1.347 - 1.348 - // check that int operands are properly extended to size_t 1.349 - assert_clean_int(length, rax); 1.350 - assert_clean_int(ckoff, rax); 1.351 - 1.352 -#ifdef ASSERT 1.353 - BLOCK_COMMENT("assert consistent ckoff/ckval"); 1.354 - // The ckoff and ckval must be mutually consistent, 1.355 - // even though caller generates both. 1.356 - { Label L; 1.357 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 1.358 - Klass::super_check_offset_offset_in_bytes()); 1.359 - __ cmpl(ckoff, Address(ckval, sco_offset)); 1.360 - __ jcc(Assembler::equal, L); 1.361 - __ stop("super_check_offset inconsistent"); 1.362 - __ bind(L); 1.363 - } 1.364 -#endif //ASSERT 1.365 - 1.366 - // Loop-invariant addresses. They are exclusive end pointers. 1.367 - Address end_from_addr(from, length, TIMES_OOP, 0); 1.368 - Address end_to_addr(to, length, TIMES_OOP, 0); 1.369 - // Loop-variant addresses. They assume post-incremented count < 0. 1.370 - Address from_element_addr(end_from, count, TIMES_OOP, 0); 1.371 - Address to_element_addr(end_to, count, TIMES_OOP, 0); 1.372 - 1.373 - gen_write_ref_array_pre_barrier(to, count); 1.374 - 1.375 - // Copy from low to high addresses, indexed from the end of each array. 1.376 - __ lea(end_from, end_from_addr); 1.377 - __ lea(end_to, end_to_addr); 1.378 - __ movptr(r14_length, length); // save a copy of the length 1.379 - assert(length == count, ""); // else fix next line: 1.380 - __ negptr(count); // negate and test the length 1.381 - __ jcc(Assembler::notZero, L_load_element); 1.382 - 1.383 - // Empty array: Nothing to do. 1.384 - __ xorptr(rax, rax); // return 0 on (trivial) success 1.385 - __ jmp(L_done); 1.386 - 1.387 - // ======== begin loop ======== 1.388 - // (Loop is rotated; its entry is L_load_element.) 1.389 - // Loop control: 1.390 - // for (count = -count; count != 0; count++) 1.391 - // Base pointers src, dst are biased by 8*(count-1),to last element. 1.392 - __ align(16); 1.393 - 1.394 - __ BIND(L_store_element); 1.395 - __ store_heap_oop(rax_oop, to_element_addr); // store the oop 1.396 - __ sync(); 1.397 - __ increment(count); // increment the count toward zero 1.398 - __ jcc(Assembler::zero, L_do_card_marks); 1.399 - 1.400 - // ======== loop entry is here ======== 1.401 - __ BIND(L_load_element); 1.402 - __ load_heap_oop(rax_oop, from_element_addr); // load the oop 1.403 - __ testptr(rax_oop, rax_oop); 1.404 - __ jcc(Assembler::zero, L_store_element); 1.405 - 1.406 - __ load_klass(r11_klass, rax_oop);// query the object klass 1.407 - generate_type_check(r11_klass, ckoff, ckval, L_store_element); 1.408 - // ======== end loop ======== 1.409 - 1.410 - // It was a real error; we must depend on the caller to finish the job. 1.411 - // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. 1.412 - // Emit GC store barriers for the oops we have copied (r14 + rdx), 1.413 - // and report their number to the caller. 1.414 - assert_different_registers(rax, r14_length, count, to, end_to, rcx); 1.415 - __ lea(end_to, to_element_addr); 1.416 - gen_write_ref_array_post_barrier(to, end_to, rscratch1); 1.417 - __ movptr(rax, r14_length); // original oops 1.418 - __ addptr(rax, count); // K = (original - remaining) oops 1.419 - __ notptr(rax); // report (-1^K) to caller 1.420 - __ jmp(L_done); 1.421 - 1.422 - // Come here on success only. 1.423 - __ BIND(L_do_card_marks); 1.424 - __ addptr(end_to, -wordSize); // make an inclusive end pointer 1.425 - gen_write_ref_array_post_barrier(to, end_to, rscratch1); 1.426 - __ xorptr(rax, rax); // return 0 on success 1.427 - 1.428 - // Common exit point (success or failure). 1.429 - __ BIND(L_done); 1.430 - __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); 1.431 - __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); 1.432 - inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); 1.433 - restore_arg_regs(); 1.434 - __ leave(); // required for proper stackwalking of RuntimeStub frame 1.435 - __ ret(0); 1.436 - 1.437 - return start; 1.438 - } 1.439 - 1.440 - // 1.441 - // Generate 'unsafe' array copy stub 1.442 - // Though just as safe as the other stubs, it takes an unscaled 1.443 - // size_t argument instead of an element count. 1.444 - // 1.445 - // Input: 1.446 - // c_rarg0 - source array address 1.447 - // c_rarg1 - destination array address 1.448 - // c_rarg2 - byte count, treated as ssize_t, can be zero 1.449 - // 1.450 - // Examines the alignment of the operands and dispatches 1.451 - // to a long, int, short, or byte copy loop. 1.452 - // 1.453 - address generate_unsafe_copy(const char *name) { 1.454 - 1.455 - Label L_long_aligned, L_int_aligned, L_short_aligned; 1.456 - 1.457 - // Input registers (before setup_arg_regs) 1.458 - const Register from = c_rarg0; // source array address 1.459 - const Register to = c_rarg1; // destination array address 1.460 - const Register size = c_rarg2; // byte count (size_t) 1.461 - 1.462 - // Register used as a temp 1.463 - const Register bits = rax; // test copy of low bits 1.464 - 1.465 - __ align(CodeEntryAlignment); 1.466 - StubCodeMark mark(this, "StubRoutines", name); 1.467 - address start = __ pc(); 1.468 - 1.469 - __ enter(); // required for proper stackwalking of RuntimeStub frame 1.470 - 1.471 - // bump this on entry, not on exit: 1.472 - inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); 1.473 - 1.474 - __ mov(bits, from); 1.475 - __ orptr(bits, to); 1.476 - __ orptr(bits, size); 1.477 - 1.478 - __ testb(bits, BytesPerLong-1); 1.479 - __ jccb(Assembler::zero, L_long_aligned); 1.480 - 1.481 - __ testb(bits, BytesPerInt-1); 1.482 - __ jccb(Assembler::zero, L_int_aligned); 1.483 - 1.484 - __ testb(bits, BytesPerShort-1); 1.485 - __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); 1.486 - 1.487 - __ BIND(L_short_aligned); 1.488 - __ shrptr(size, LogBytesPerShort); // size => short_count 1.489 - __ jump(RuntimeAddress(short_copy_entry)); 1.490 - 1.491 - __ BIND(L_int_aligned); 1.492 - __ shrptr(size, LogBytesPerInt); // size => int_count 1.493 - __ jump(RuntimeAddress(int_copy_entry)); 1.494 - 1.495 - __ BIND(L_long_aligned); 1.496 - __ shrptr(size, LogBytesPerLong); // size => qword_count 1.497 - __ jump(RuntimeAddress(long_copy_entry)); 1.498 - 1.499 - return start; 1.500 - } 1.501 - 1.502 - // Perform range checks on the proposed arraycopy. 1.503 - // Kills temp, but nothing else. 1.504 - // Also, clean the sign bits of src_pos and dst_pos. 1.505 - void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 1.506 - Register src_pos, // source position (c_rarg1) 1.507 - Register dst, // destination array oo (c_rarg2) 1.508 - Register dst_pos, // destination position (c_rarg3) 1.509 - Register length, 1.510 - Register temp, 1.511 - Label& L_failed) { 1.512 - BLOCK_COMMENT("arraycopy_range_checks:"); 1.513 - 1.514 - // if (src_pos + length > arrayOop(src)->length()) FAIL; 1.515 - __ movl(temp, length); 1.516 - __ addl(temp, src_pos); // src_pos + length 1.517 - __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); 1.518 - __ jcc(Assembler::above, L_failed); 1.519 - 1.520 - // if (dst_pos + length > arrayOop(dst)->length()) FAIL; 1.521 - __ movl(temp, length); 1.522 - __ addl(temp, dst_pos); // dst_pos + length 1.523 - __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); 1.524 - __ jcc(Assembler::above, L_failed); 1.525 - 1.526 - // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 1.527 - // Move with sign extension can be used since they are positive. 1.528 - __ movslq(src_pos, src_pos); 1.529 - __ movslq(dst_pos, dst_pos); 1.530 - 1.531 - BLOCK_COMMENT("arraycopy_range_checks done"); 1.532 - } 1.533 - 1.534 - // 1.535 - // Generate generic array copy stubs 1.536 - // 1.537 - // Input: 1.538 - // c_rarg0 - src oop 1.539 - // c_rarg1 - src_pos (32-bits) 1.540 - // c_rarg2 - dst oop 1.541 - // c_rarg3 - dst_pos (32-bits) 1.542 - // not Win64 1.543 - // c_rarg4 - element count (32-bits) 1.544 - // Win64 1.545 - // rsp+40 - element count (32-bits) 1.546 - // 1.547 - // Output: 1.548 - // rax == 0 - success 1.549 - // rax == -1^K - failure, where K is partial transfer count 1.550 - // 1.551 - address generate_generic_copy(const char *name) { 1.552 - 1.553 - Label L_failed, L_failed_0, L_objArray; 1.554 - Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; 1.555 - 1.556 - // Input registers 1.557 - const Register src = c_rarg0; // source array oop 1.558 - const Register src_pos = c_rarg1; // source position 1.559 - const Register dst = c_rarg2; // destination array oop 1.560 - const Register dst_pos = c_rarg3; // destination position 1.561 - // elements count is on stack on Win64 1.562 -#ifdef _WIN64 1.563 -#define C_RARG4 Address(rsp, 6 * wordSize) 1.564 -#else 1.565 -#define C_RARG4 c_rarg4 1.566 -#endif 1.567 - 1.568 - { int modulus = CodeEntryAlignment; 1.569 - int target = modulus - 5; // 5 = sizeof jmp(L_failed) 1.570 - int advance = target - (__ offset() % modulus); 1.571 - if (advance < 0) advance += modulus; 1.572 - if (advance > 0) __ nop(advance); 1.573 - } 1.574 - StubCodeMark mark(this, "StubRoutines", name); 1.575 - 1.576 - // Short-hop target to L_failed. Makes for denser prologue code. 1.577 - __ BIND(L_failed_0); 1.578 - __ jmp(L_failed); 1.579 - assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); 1.580 - 1.581 - __ align(CodeEntryAlignment); 1.582 - address start = __ pc(); 1.583 - 1.584 - __ enter(); // required for proper stackwalking of RuntimeStub frame 1.585 - 1.586 - // bump this on entry, not on exit: 1.587 - inc_counter_np(SharedRuntime::_generic_array_copy_ctr); 1.588 - 1.589 - //----------------------------------------------------------------------- 1.590 - // Assembler stub will be used for this call to arraycopy 1.591 - // if the following conditions are met: 1.592 - // 1.593 - // (1) src and dst must not be null. 1.594 - // (2) src_pos must not be negative. 1.595 - // (3) dst_pos must not be negative. 1.596 - // (4) length must not be negative. 1.597 - // (5) src klass and dst klass should be the same and not NULL. 1.598 - // (6) src and dst should be arrays. 1.599 - // (7) src_pos + length must not exceed length of src. 1.600 - // (8) dst_pos + length must not exceed length of dst. 1.601 - // 1.602 - 1.603 - // if (src == NULL) return -1; 1.604 - __ testptr(src, src); // src oop 1.605 - size_t j1off = __ offset(); 1.606 - __ jccb(Assembler::zero, L_failed_0); 1.607 - 1.608 - // if (src_pos < 0) return -1; 1.609 - __ testl(src_pos, src_pos); // src_pos (32-bits) 1.610 - __ jccb(Assembler::negative, L_failed_0); 1.611 - 1.612 - // if (dst == NULL) return -1; 1.613 - __ testptr(dst, dst); // dst oop 1.614 - __ jccb(Assembler::zero, L_failed_0); 1.615 - 1.616 - // if (dst_pos < 0) return -1; 1.617 - __ testl(dst_pos, dst_pos); // dst_pos (32-bits) 1.618 - size_t j4off = __ offset(); 1.619 - __ jccb(Assembler::negative, L_failed_0); 1.620 - 1.621 - // The first four tests are very dense code, 1.622 - // but not quite dense enough to put four 1.623 - // jumps in a 16-byte instruction fetch buffer. 1.624 - // That's good, because some branch predicters 1.625 - // do not like jumps so close together. 1.626 - // Make sure of this. 1.627 - guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); 1.628 - 1.629 - // registers used as temp 1.630 - const Register r11_length = r11; // elements count to copy 1.631 - const Register r10_src_klass = r10; // array klass 1.632 - const Register r9_dst_klass = r9; // dest array klass 1.633 - 1.634 - // if (length < 0) return -1; 1.635 - __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) 1.636 - __ testl(r11_length, r11_length); 1.637 - __ jccb(Assembler::negative, L_failed_0); 1.638 - 1.639 - __ load_klass(r10_src_klass, src); 1.640 -#ifdef ASSERT 1.641 - // assert(src->klass() != NULL); 1.642 - BLOCK_COMMENT("assert klasses not null"); 1.643 - { Label L1, L2; 1.644 - __ testptr(r10_src_klass, r10_src_klass); 1.645 - __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL 1.646 - __ bind(L1); 1.647 - __ stop("broken null klass"); 1.648 - __ bind(L2); 1.649 - __ load_klass(r9_dst_klass, dst); 1.650 - __ cmpq(r9_dst_klass, 0); 1.651 - __ jcc(Assembler::equal, L1); // this would be broken also 1.652 - BLOCK_COMMENT("assert done"); 1.653 - } 1.654 -#endif 1.655 - 1.656 - // Load layout helper (32-bits) 1.657 - // 1.658 - // |array_tag| | header_size | element_type | |log2_element_size| 1.659 - // 32 30 24 16 8 2 0 1.660 - // 1.661 - // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 1.662 - // 1.663 - 1.664 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 1.665 - Klass::layout_helper_offset_in_bytes(); 1.666 - 1.667 - const Register rax_lh = rax; // layout helper 1.668 - 1.669 - __ movl(rax_lh, Address(r10_src_klass, lh_offset)); 1.670 - 1.671 - // Handle objArrays completely differently... 1.672 - jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 1.673 - __ cmpl(rax_lh, objArray_lh); 1.674 - __ jcc(Assembler::equal, L_objArray); 1.675 - 1.676 - // if (src->klass() != dst->klass()) return -1; 1.677 - __ load_klass(r9_dst_klass, dst); 1.678 - __ cmpq(r10_src_klass, r9_dst_klass); 1.679 - __ jcc(Assembler::notEqual, L_failed); 1.680 - 1.681 - // if (!src->is_Array()) return -1; 1.682 - __ cmpl(rax_lh, Klass::_lh_neutral_value); 1.683 - __ jcc(Assembler::greaterEqual, L_failed); 1.684 - 1.685 - // At this point, it is known to be a typeArray (array_tag 0x3). 1.686 -#ifdef ASSERT 1.687 - { Label L; 1.688 - __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); 1.689 - __ jcc(Assembler::greaterEqual, L); 1.690 - __ stop("must be a primitive array"); 1.691 - __ bind(L); 1.692 - } 1.693 -#endif 1.694 - 1.695 - arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.696 - r10, L_failed); 1.697 - 1.698 - // typeArrayKlass 1.699 - // 1.700 - // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 1.701 - // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 1.702 - // 1.703 - 1.704 - const Register r10_offset = r10; // array offset 1.705 - const Register rax_elsize = rax_lh; // element size 1.706 - 1.707 - __ movl(r10_offset, rax_lh); 1.708 - __ shrl(r10_offset, Klass::_lh_header_size_shift); 1.709 - __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset 1.710 - __ addptr(src, r10_offset); // src array offset 1.711 - __ addptr(dst, r10_offset); // dst array offset 1.712 - BLOCK_COMMENT("choose copy loop based on element size"); 1.713 - __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize 1.714 - 1.715 - // next registers should be set before the jump to corresponding stub 1.716 - const Register from = c_rarg0; // source array address 1.717 - const Register to = c_rarg1; // destination array address 1.718 - const Register count = c_rarg2; // elements count 1.719 - 1.720 - // 'from', 'to', 'count' registers should be set in such order 1.721 - // since they are the same as 'src', 'src_pos', 'dst'. 1.722 - 1.723 - __ BIND(L_copy_bytes); 1.724 - __ cmpl(rax_elsize, 0); 1.725 - __ jccb(Assembler::notEqual, L_copy_shorts); 1.726 - __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr 1.727 - __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr 1.728 - __ movl2ptr(count, r11_length); // length 1.729 - __ jump(RuntimeAddress(byte_copy_entry)); 1.730 - 1.731 - __ BIND(L_copy_shorts); 1.732 - __ cmpl(rax_elsize, LogBytesPerShort); 1.733 - __ jccb(Assembler::notEqual, L_copy_ints); 1.734 - __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr 1.735 - __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr 1.736 - __ movl2ptr(count, r11_length); // length 1.737 - __ jump(RuntimeAddress(short_copy_entry)); 1.738 - 1.739 - __ BIND(L_copy_ints); 1.740 - __ cmpl(rax_elsize, LogBytesPerInt); 1.741 - __ jccb(Assembler::notEqual, L_copy_longs); 1.742 - __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr 1.743 - __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr 1.744 - __ movl2ptr(count, r11_length); // length 1.745 - __ jump(RuntimeAddress(int_copy_entry)); 1.746 - 1.747 - __ BIND(L_copy_longs); 1.748 -#ifdef ASSERT 1.749 - { Label L; 1.750 - __ cmpl(rax_elsize, LogBytesPerLong); 1.751 - __ jcc(Assembler::equal, L); 1.752 - __ stop("must be long copy, but elsize is wrong"); 1.753 - __ bind(L); 1.754 - } 1.755 -#endif 1.756 - __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr 1.757 - __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr 1.758 - __ movl2ptr(count, r11_length); // length 1.759 - __ jump(RuntimeAddress(long_copy_entry)); 1.760 - 1.761 - // objArrayKlass 1.762 - __ BIND(L_objArray); 1.763 - // live at this point: r10_src_klass, src[_pos], dst[_pos] 1.764 - 1.765 - Label L_plain_copy, L_checkcast_copy; 1.766 - // test array classes for subtyping 1.767 - __ load_klass(r9_dst_klass, dst); 1.768 - __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality 1.769 - __ jcc(Assembler::notEqual, L_checkcast_copy); 1.770 - 1.771 - // Identically typed arrays can be copied without element-wise checks. 1.772 - arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.773 - r10, L_failed); 1.774 - 1.775 - __ lea(from, Address(src, src_pos, TIMES_OOP, 1.776 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr 1.777 - __ lea(to, Address(dst, dst_pos, TIMES_OOP, 1.778 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr 1.779 - __ movl2ptr(count, r11_length); // length 1.780 - __ BIND(L_plain_copy); 1.781 - __ jump(RuntimeAddress(oop_copy_entry)); 1.782 - 1.783 - __ BIND(L_checkcast_copy); 1.784 - // live at this point: r10_src_klass, !r11_length 1.785 - { 1.786 - // assert(r11_length == C_RARG4); // will reload from here 1.787 - Register r11_dst_klass = r11; 1.788 - __ load_klass(r11_dst_klass, dst); 1.789 - 1.790 - // Before looking at dst.length, make sure dst is also an objArray. 1.791 - __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); 1.792 - __ jcc(Assembler::notEqual, L_failed); 1.793 - 1.794 - // It is safe to examine both src.length and dst.length. 1.795 -#ifndef _WIN64 1.796 - arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4, 1.797 - rax, L_failed); 1.798 -#else 1.799 - __ movl(r11_length, C_RARG4); // reload 1.800 - arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 1.801 - rax, L_failed); 1.802 - __ load_klass(r11_dst_klass, dst); // reload 1.803 -#endif 1.804 - 1.805 - // Marshal the base address arguments now, freeing registers. 1.806 - __ lea(from, Address(src, src_pos, TIMES_OOP, 1.807 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 1.808 - __ lea(to, Address(dst, dst_pos, TIMES_OOP, 1.809 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 1.810 - __ movl(count, C_RARG4); // length (reloaded) 1.811 - Register sco_temp = c_rarg3; // this register is free now 1.812 - assert_different_registers(from, to, count, sco_temp, 1.813 - r11_dst_klass, r10_src_klass); 1.814 - assert_clean_int(count, sco_temp); 1.815 - 1.816 - // Generate the type check. 1.817 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 1.818 - Klass::super_check_offset_offset_in_bytes()); 1.819 - __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 1.820 - assert_clean_int(sco_temp, rax); 1.821 - generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); 1.822 - 1.823 - // Fetch destination element klass from the objArrayKlass header. 1.824 - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 1.825 - objArrayKlass::element_klass_offset_in_bytes()); 1.826 - __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); 1.827 - __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 1.828 - assert_clean_int(sco_temp, rax); 1.829 - 1.830 - // the checkcast_copy loop needs two extra arguments: 1.831 - assert(c_rarg3 == sco_temp, "#3 already in place"); 1.832 - __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass 1.833 - __ jump(RuntimeAddress(checkcast_copy_entry)); 1.834 - } 1.835 - 1.836 - __ BIND(L_failed); 1.837 - __ xorptr(rax, rax); 1.838 - __ notptr(rax); // return -1 1.839 - __ leave(); // required for proper stackwalking of RuntimeStub frame 1.840 - __ ret(0); 1.841 - 1.842 - return start; 1.843 - } 1.844 - 1.845 -#undef length_arg 1.846 -#endif 1.847 1.848 //FIXME 1.849 address generate_disjoint_long_copy(bool aligned, const char *name) {