Sun, 09 Oct 2016 14:48:55 +0800
merge
src/cpu/mips/vm/stubGenerator_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/assembler_mips.cpp Tue Sep 20 10:40:42 2016 +0800 1.2 +++ b/src/cpu/mips/vm/assembler_mips.cpp Sun Oct 09 14:48:55 2016 +0800 1.3 @@ -1926,6 +1926,7 @@ 1.4 #endif 1.5 sync(); 1.6 sb(R0, AT, 0); 1.7 + sync(); 1.8 } 1.9 /* 1.10 void MacroAssembler::subptr(Register dst, int32_t imm32) {
2.1 --- a/src/cpu/mips/vm/mips_64.ad Tue Sep 20 10:40:42 2016 +0800 2.2 +++ b/src/cpu/mips/vm/mips_64.ad Sun Oct 09 14:48:55 2016 +0800 2.3 @@ -1947,56 +1947,52 @@ 2.4 2.5 guarantee(scale == 0, "scale is not zero !"); 2.6 2.7 + __ sync(); 2.8 + 2.9 if( index != 0 ) { 2.10 __ daddu(AT, as_Register(base), as_Register(index)); 2.11 if( Assembler::is_simm16(disp) ) { 2.12 if (value == 0) { 2.13 - __ sync(); 2.14 __ sb(R0, AT, disp); 2.15 } else { 2.16 __ move(T9, value); 2.17 - __ sync(); 2.18 __ sb(T9, AT, disp); 2.19 } 2.20 } else { 2.21 if (value == 0) { 2.22 __ move(T9, disp); 2.23 __ daddu(AT, AT, T9); 2.24 - __ sync(); 2.25 __ sb(R0, AT, 0); 2.26 } else { 2.27 __ move(T9, disp); 2.28 __ daddu(AT, AT, T9); 2.29 __ move(T9, value); 2.30 - __ sync(); 2.31 __ sb(T9, AT, 0); 2.32 } 2.33 } 2.34 } else { 2.35 if( Assembler::is_simm16(disp) ) { 2.36 if (value == 0) { 2.37 - __ sync(); 2.38 __ sb(R0, as_Register(base), disp); 2.39 } else { 2.40 __ move(AT, value); 2.41 - __ sync(); 2.42 __ sb(AT, as_Register(base), disp); 2.43 } 2.44 } else { 2.45 if (value == 0) { 2.46 __ move(T9, disp); 2.47 __ daddu(AT, as_Register(base), T9); 2.48 - __ sync(); 2.49 __ sb(R0, AT, 0); 2.50 } else { 2.51 __ move(T9, disp); 2.52 __ daddu(AT, as_Register(base), T9); 2.53 __ move(T9, value); 2.54 - __ sync(); 2.55 __ sb(T9, AT, 0); 2.56 } 2.57 } 2.58 } 2.59 + 2.60 + __ sync(); 2.61 %} 2.62 2.63 // Load Short (16bit signed)
3.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Tue Sep 20 10:40:42 2016 +0800 3.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sun Oct 09 14:48:55 2016 +0800 3.3 @@ -564,16 +564,6 @@ 3.4 // Generate store check for array 3.5 // 3.6 // Input: 3.7 - // %edi - starting address 3.8 - // %ecx - element count 3.9 - // 3.10 - // The 2 input registers are overwritten 3.11 - // 3.12 - 3.13 - // 3.14 - // Generate store check for array 3.15 - // 3.16 - // Input: 3.17 // T0 - starting address(edi) 3.18 // T1 - element count (ecx) 3.19 // 3.20 @@ -1276,32 +1266,23 @@ 3.21 // used by generate_conjoint_int_oop_copy(). 3.22 // 3.23 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { 3.24 - Label l_2, l_3, l_4, l_5, l_6, l_7, l_stchk; 3.25 + Label l_3, l_4, l_5, l_6, l_7; 3.26 StubCodeMark mark(this, "StubRoutines", name); 3.27 + 3.28 __ align(CodeEntryAlignment); 3.29 address start = __ pc(); 3.30 - __ push(T3); 3.31 - __ push(T0); 3.32 - __ push(T1); 3.33 - __ push(T8); 3.34 - __ move(T1, A2); 3.35 - __ move(T3, A0); 3.36 + __ push(T3); 3.37 + __ push(T0); 3.38 + __ push(T1); 3.39 + __ push(T8); 3.40 + __ move(T1, A2); 3.41 + __ move(T3, A0); 3.42 __ move(T0, A1); 3.43 3.44 - __ b(l_2); 3.45 - __ delayed()->nop(); 3.46 if (is_oop) { 3.47 - __ b(l_stchk); 3.48 - __ delayed()->nop(); 3.49 + __ sync(); 3.50 } 3.51 - __ pop(T8); 3.52 - __ pop(T1); 3.53 - __ pop(T0); 3.54 - __ pop(T3); 3.55 - __ jr(RA); 3.56 - __ delayed()->nop(); 3.57 3.58 - __ bind(l_2); 3.59 if(!aligned) { 3.60 __ xorr(AT, T3, T0); 3.61 __ andi(AT, AT, 7); 3.62 @@ -1318,8 +1299,9 @@ 3.63 __ daddi(T3, T3, 4); 3.64 __ daddi(T0, T0, 4); 3.65 } 3.66 - __ bind(l_6); 3.67 + 3.68 { 3.69 + __ bind(l_6); 3.70 __ daddi(AT, T1, -1); 3.71 __ blez(AT, l_5); 3.72 __ delayed()->nop(); 3.73 @@ -1336,30 +1318,35 @@ 3.74 } 3.75 3.76 __ bind(l_5); 3.77 - __ beq(T1, R0, l_4); 3.78 - __ delayed()->nop(); 3.79 + __ beq(T1, R0, l_4); 3.80 + __ delayed()->nop(); 3.81 + 3.82 __ align(16); 3.83 __ bind(l_3); 3.84 - __ lw(AT, T3, 0); 3.85 - __ sw(AT, T0, 0); 3.86 + __ lw(AT, T3, 0); 3.87 + __ sw(AT, T0, 0); 3.88 __ addi(T3, T3, 4); 3.89 __ addi(T0, T0, 4); 3.90 - __ addi(T1, T1, -1); 3.91 - __ bne(T1, R0, l_3); 3.92 - __ delayed()->nop(); 3.93 + __ addi(T1, T1, -1); 3.94 + __ bne(T1, R0, l_3); 3.95 + __ delayed()->nop(); 3.96 + 3.97 if (is_oop) { 3.98 - __ bind(l_stchk); 3.99 - __ move(T0, A1); 3.100 - __ move(T1, A2); 3.101 + __ move(T0, A1); 3.102 + __ move(T1, A2); 3.103 array_store_check(); 3.104 + __ sync(); 3.105 } 3.106 + 3.107 + // exit 3.108 __ bind(l_4); 3.109 __ pop(T8); 3.110 __ pop(T1); 3.111 __ pop(T0); 3.112 __ pop(T3); 3.113 - __ jr(RA); 3.114 - __ delayed()->nop(); 3.115 + __ jr(RA); 3.116 + __ delayed()->nop(); 3.117 + 3.118 return start; 3.119 } 3.120 3.121 @@ -1379,7 +1366,7 @@ 3.122 // cache line boundaries will still be loaded and stored atomicly. 3.123 // 3.124 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { 3.125 - Label l_2, l_3, l_4, l_stchk; 3.126 + Label l_2, l_4; 3.127 StubCodeMark mark(this, "StubRoutines", name); 3.128 __ align(CodeEntryAlignment); 3.129 address start = __ pc(); 3.130 @@ -1402,84 +1389,51 @@ 3.131 __ push(T1); 3.132 __ push(T8); 3.133 3.134 - /* 3.135 - __ pushl(esi); 3.136 - __ movl(ecx, Address(esp, 4+12)); // count 3.137 - __ pushl(edi); 3.138 - __ movl(esi, Address(esp, 8+ 4)); // from 3.139 - __ movl(edi, Address(esp, 8+ 8)); // to 3.140 - */ 3.141 __ move(T1, A2); 3.142 __ move(T3, A0); 3.143 __ move(T0, A1); 3.144 + // T3: source array address 3.145 + // T0: destination array address 3.146 + // T1: element count 3.147 3.148 - //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 3.149 + if (is_oop) { 3.150 + __ sync(); 3.151 + } 3.152 + 3.153 __ sll(AT, T1, Address::times_4); 3.154 __ add(AT, T3, AT); 3.155 __ lea(T3 , Address(AT, -4)); 3.156 - //__ std(); 3.157 - //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 3.158 __ sll(AT, T1, Address::times_4); 3.159 __ add(AT, T0, AT); 3.160 __ lea(T0 , Address(AT, -4)); 3.161 3.162 - // __ cmpl(ecx, 32); 3.163 - // __ jcc(Assembler::above, l_3); // > 32 dwords 3.164 - // __ testl(ecx, ecx); 3.165 - //__ jcc(Assembler::zero, l_4); 3.166 __ beq(T1, R0, l_4); 3.167 __ delayed()->nop(); 3.168 - // __ subl(edi, esi); 3.169 + 3.170 __ align(16); 3.171 __ bind(l_2); 3.172 - // __ movl(edx, Address(esi)); 3.173 __ lw(AT, T3, 0); 3.174 - // __ movl(Address(esi, edi, Address::times_1), edx); 3.175 __ sw(AT, T0, 0); 3.176 - // __ subl(esi, 4); 3.177 __ addi(T3, T3, -4); 3.178 __ addi(T0, T0, -4); 3.179 - // __ decl(ecx); 3.180 __ addi(T1, T1, -1); 3.181 - //__ jcc(Assembler::notEqual, l_2); 3.182 __ bne(T1, R0, l_2); 3.183 __ delayed()->nop(); 3.184 + 3.185 if (is_oop) { 3.186 - // __ jmp(l_stchk); 3.187 - __ b( l_stchk); 3.188 - __ delayed()->nop(); 3.189 + __ move(T0, A1); 3.190 + __ move(T1, A2); 3.191 + array_store_check(); 3.192 + __ sync(); 3.193 } 3.194 __ bind(l_4); 3.195 - // __ cld(); 3.196 - // __ popl(edi); 3.197 - // __ popl(esi); 3.198 - // __ ret(0); 3.199 __ pop(T8); 3.200 __ pop(T1); 3.201 __ pop(T0); 3.202 __ pop(T3); 3.203 __ jr(RA); 3.204 __ delayed()->nop(); 3.205 - __ bind(l_3); 3.206 - // __ rep_movl(); 3.207 - if (is_oop) { 3.208 - __ bind(l_stchk); 3.209 - // __ movl(edi, Address(esp, 8+ 8)); 3.210 - __ move(T0, A1); 3.211 - // __ movl(ecx, Address(esp, 8+ 12)); 3.212 - __ move(T1, A2); 3.213 - array_store_check(); 3.214 - } 3.215 - // __ cld(); 3.216 - // __ popl(edi); 3.217 - // __ popl(esi); 3.218 - // __ ret(0); 3.219 - __ pop(T8); 3.220 - __ pop(T1); 3.221 - __ pop(T0); 3.222 - __ pop(T3); 3.223 - __ jr(RA); 3.224 - __ delayed()->nop(); 3.225 + 3.226 return start; 3.227 } 3.228 3.229 @@ -1503,70 +1457,49 @@ 3.230 // used by generate_conjoint_int_oop_copy(). 3.231 // 3.232 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 3.233 - Label l_2, l_3, l_4, l_stchk; 3.234 + Label l_3, l_4; 3.235 StubCodeMark mark(this, "StubRoutines", name); 3.236 __ align(CodeEntryAlignment); 3.237 address start = __ pc(); 3.238 + 3.239 __ push(T3); 3.240 __ push(T0); 3.241 __ push(T1); 3.242 __ push(T8); 3.243 + 3.244 __ move(T1, A2); 3.245 __ move(T3, A0); 3.246 __ move(T0, A1); 3.247 + // T3: source array address 3.248 + // T0: destination array address 3.249 + // T1: element count 3.250 3.251 - // __ cmpl(ecx, 32); 3.252 - // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords 3.253 - // __ rep_movl(); 3.254 - __ b(l_2); 3.255 - __ delayed()->nop(); 3.256 if (is_oop) { 3.257 - // __ jmp(l_stchk); 3.258 - __ b(l_stchk); 3.259 - __ delayed()->nop(); 3.260 + __ sync(); 3.261 } 3.262 - // __ popl(edi); 3.263 - // __ popl(esi); 3.264 - // __ ret(0); 3.265 - __ pop(T8); 3.266 - __ pop(T1); 3.267 - __ pop(T0); 3.268 - __ pop(T3); 3.269 - __ jr(RA); 3.270 + 3.271 + __ beq(T1, R0, l_4); 3.272 __ delayed()->nop(); 3.273 3.274 - __ bind(l_2); 3.275 - // __ subl(edi, esi); 3.276 - // __ testl(ecx, ecx); 3.277 - // __ jcc(Assembler::zero, l_4); 3.278 - __ beq(T1, R0, l_4); 3.279 - __ delayed()->nop(); 3.280 __ align(16); 3.281 __ bind(l_3); 3.282 - //__ movl(edx, Address(esi)); 3.283 __ ld(AT, T3, 0); 3.284 - // __ movl(Address(edi, esi, Address::times_1), edx); 3.285 __ sd(AT, T0, 0); 3.286 - // __ addl(esi, 4); 3.287 __ addi(T3, T3, 8); 3.288 __ addi(T0, T0, 8); 3.289 - // __ decl(ecx); 3.290 __ addi(T1, T1, -1); 3.291 - // __ jcc(Assembler::notEqual, l_3); 3.292 __ bne(T1, R0, l_3); 3.293 __ delayed()->nop(); 3.294 + 3.295 if (is_oop) { 3.296 - __ bind(l_stchk); 3.297 - // __ movl(edi, Address(esp, 8+ 8)); 3.298 - // __ movl(ecx, Address(esp, 8+ 12)); 3.299 __ move(T0, A1); 3.300 __ move(T1, A2); 3.301 array_store_check(); 3.302 + __ sync(); 3.303 } 3.304 + 3.305 + // exit 3.306 __ bind(l_4); 3.307 - // __ popl(edi); 3.308 - // __ popl(esi); 3.309 - // __ ret(0); 3.310 __ pop(T8); 3.311 __ pop(T1); 3.312 __ pop(T0); 3.313 @@ -1592,7 +1525,7 @@ 3.314 // cache line boundaries will still be loaded and stored atomicly. 3.315 // 3.316 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 3.317 - Label l_2, l_3, l_4, l_stchk; 3.318 + Label l_2, l_4; 3.319 StubCodeMark mark(this, "StubRoutines", name); 3.320 __ align(CodeEntryAlignment); 3.321 address start = __ pc(); 3.322 @@ -1619,67 +1552,37 @@ 3.323 __ move(T3, A0); 3.324 __ move(T0, A1); 3.325 3.326 - //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 3.327 + if (is_oop) { 3.328 + __ sync(); 3.329 + } 3.330 + 3.331 __ sll(AT, T1, Address::times_8); 3.332 __ add(AT, T3, AT); 3.333 __ lea(T3 , Address(AT, -8)); 3.334 - //__ std(); 3.335 - //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 3.336 __ sll(AT, T1, Address::times_8); 3.337 __ add(AT, T0, AT); 3.338 __ lea(T0 , Address(AT, -8)); 3.339 3.340 - // __ cmpl(ecx, 32); 3.341 - // __ jcc(Assembler::above, l_3); // > 32 dwords 3.342 - // __ testl(ecx, ecx); 3.343 - //__ jcc(Assembler::zero, l_4); 3.344 __ beq(T1, R0, l_4); 3.345 __ delayed()->nop(); 3.346 - // __ subl(edi, esi); 3.347 + 3.348 __ align(16); 3.349 __ bind(l_2); 3.350 - // __ movl(edx, Address(esi)); 3.351 __ ld(AT, T3, 0); 3.352 - // __ movl(Address(esi, edi, Address::times_1), edx); 3.353 __ sd(AT, T0, 0); 3.354 - // __ subl(esi, 4); 3.355 __ addi(T3, T3, -8); 3.356 __ addi(T0, T0, -8); 3.357 - // __ decl(ecx); 3.358 __ addi(T1, T1, -1); 3.359 - //__ jcc(Assembler::notEqual, l_2); 3.360 __ bne(T1, R0, l_2); 3.361 __ delayed()->nop(); 3.362 + 3.363 if (is_oop) { 3.364 - // __ jmp(l_stchk); 3.365 - __ b( l_stchk); 3.366 - __ delayed()->nop(); 3.367 + __ move(T0, A1); 3.368 + __ move(T1, A2); 3.369 + array_store_check(); 3.370 + __ sync(); 3.371 } 3.372 __ bind(l_4); 3.373 - // __ cld(); 3.374 - // __ popl(edi); 3.375 - // __ popl(esi); 3.376 - // __ ret(0); 3.377 - __ pop(T8); 3.378 - __ pop(T1); 3.379 - __ pop(T0); 3.380 - __ pop(T3); 3.381 - __ jr(RA); 3.382 - __ delayed()->nop(); 3.383 - __ bind(l_3); 3.384 - // __ rep_movl(); 3.385 - if (is_oop) { 3.386 - __ bind(l_stchk); 3.387 - // __ movl(edi, Address(esp, 8+ 8)); 3.388 - __ move(T0, A1); 3.389 - // __ movl(ecx, Address(esp, 8+ 12)); 3.390 - __ move(T1, A2); 3.391 - array_store_check(); 3.392 - } 3.393 - // __ cld(); 3.394 - // __ popl(edi); 3.395 - // __ popl(esi); 3.396 - // __ ret(0); 3.397 __ pop(T8); 3.398 __ pop(T1); 3.399 __ pop(T0); 3.400 @@ -1688,759 +1591,6 @@ 3.401 __ delayed()->nop(); 3.402 return start; 3.403 } 3.404 -#if 0 3.405 - // Arguments: 3.406 - // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes 3.407 - // ignored 3.408 - // is_oop - true => oop array, so generate store check code 3.409 - // name - stub name string 3.410 - // 3.411 - // Inputs: 3.412 - // c_rarg0 - source array address 3.413 - // c_rarg1 - destination array address 3.414 - // c_rarg2 - element count, treated as ssize_t, can be zero 3.415 - // 3.416 - address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 3.417 - __ align(CodeEntryAlignment); 3.418 - StubCodeMark mark(this, "StubRoutines", name); 3.419 - address start = __ pc(); 3.420 - 3.421 - Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 3.422 - const Register from = rdi; // source array address 3.423 - const Register to = rsi; // destination array address 3.424 - const Register qword_count = rdx; // elements count 3.425 - const Register saved_count = rcx; 3.426 - 3.427 - __ enter(); // required for proper stackwalking of RuntimeStub frame 3.428 - assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 3.429 - 3.430 - address disjoint_copy_entry = NULL; 3.431 - if (is_oop) { 3.432 - assert(!UseCompressedOops, "shouldn't be called for compressed oops"); 3.433 - disjoint_copy_entry = disjoint_oop_copy_entry; 3.434 - oop_copy_entry = __ pc(); 3.435 - array_overlap_test(disjoint_oop_copy_entry, Address::times_8); 3.436 - } else { 3.437 - disjoint_copy_entry = disjoint_long_copy_entry; 3.438 - long_copy_entry = __ pc(); 3.439 - array_overlap_test(disjoint_long_copy_entry, Address::times_8); 3.440 - } 3.441 - BLOCK_COMMENT("Entry:"); 3.442 - // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 3.443 - 3.444 - array_overlap_test(disjoint_copy_entry, Address::times_8); 3.445 - setup_arg_regs(); // from => rdi, to => rsi, count => rdx 3.446 - // r9 and r10 may be used to save non-volatile registers 3.447 - 3.448 - // 'from', 'to' and 'qword_count' are now valid 3.449 - 3.450 - if (is_oop) { 3.451 - // Save to and count for store barrier 3.452 - __ movptr(saved_count, qword_count); 3.453 - // No registers are destroyed by this call 3.454 - gen_write_ref_array_pre_barrier(to, saved_count); 3.455 - } 3.456 - 3.457 - __ jmp(L_copy_32_bytes); 3.458 - 3.459 - // Copy trailing qwords 3.460 - __ BIND(L_copy_8_bytes); 3.461 - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); 3.462 - __ movq(Address(to, qword_count, Address::times_8, -8), rax); 3.463 - __ decrement(qword_count); 3.464 - __ jcc(Assembler::notZero, L_copy_8_bytes); 3.465 - 3.466 - if (is_oop) { 3.467 - __ jmp(L_exit); 3.468 - } else { 3.469 - inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); 3.470 - restore_arg_regs(); 3.471 - __ xorptr(rax, rax); // return 0 3.472 - __ leave(); // required for proper stackwalking of RuntimeStub frame 3.473 - __ ret(0); 3.474 - } 3.475 - 3.476 - // Copy in 32-bytes chunks 3.477 - copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 3.478 - 3.479 - if (is_oop) { 3.480 - __ BIND(L_exit); 3.481 - __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); 3.482 - gen_write_ref_array_post_barrier(to, rcx, rax); 3.483 - inc_counter_np(SharedRuntime::_oop_array_copy_ctr); 3.484 - } else { 3.485 - inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); 3.486 - } 3.487 - restore_arg_regs(); 3.488 - __ xorptr(rax, rax); // return 0 3.489 - __ leave(); // required for proper stackwalking of RuntimeStub frame 3.490 - __ ret(0); 3.491 - 3.492 - return start; 3.493 - } 3.494 - 3.495 - 3.496 - // Helper for generating a dynamic type check. 3.497 - // Smashes no registers. 3.498 - void generate_type_check(Register sub_klass, 3.499 - Register super_check_offset, 3.500 - Register super_klass, 3.501 - Label& L_success) { 3.502 - assert_different_registers(sub_klass, super_check_offset, super_klass); 3.503 - 3.504 - BLOCK_COMMENT("type_check:"); 3.505 - 3.506 - Label L_miss; 3.507 - 3.508 - // a couple of useful fields in sub_klass: 3.509 - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 3.510 - Klass::secondary_supers_offset_in_bytes()); 3.511 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 3.512 - Klass::secondary_super_cache_offset_in_bytes()); 3.513 - Address secondary_supers_addr(sub_klass, ss_offset); 3.514 - Address super_cache_addr( sub_klass, sc_offset); 3.515 - 3.516 - // if the pointers are equal, we are done (e.g., String[] elements) 3.517 - __ cmpptr(super_klass, sub_klass); 3.518 - __ jcc(Assembler::equal, L_success); 3.519 - 3.520 - // check the supertype display: 3.521 - Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 3.522 - __ cmpptr(super_klass, super_check_addr); // test the super type 3.523 - __ jcc(Assembler::equal, L_success); 3.524 - 3.525 - // if it was a primary super, we can just fail immediately 3.526 - __ cmpl(super_check_offset, sc_offset); 3.527 - __ jcc(Assembler::notEqual, L_miss); 3.528 - 3.529 - // Now do a linear scan of the secondary super-klass chain. 3.530 - // The repne_scan instruction uses fixed registers, which we must spill. 3.531 - // (We need a couple more temps in any case.) 3.532 - // This code is rarely used, so simplicity is a virtue here. 3.533 - inc_counter_np(SharedRuntime::_partial_subtype_ctr); 3.534 - { 3.535 - __ push(rax); 3.536 - __ push(rcx); 3.537 - __ push(rdi); 3.538 - assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); 3.539 - 3.540 - __ movptr(rdi, secondary_supers_addr); 3.541 - // Load the array length. 3.542 - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 3.543 - // Skip to start of data. 3.544 - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 3.545 - // Scan rcx words at [rdi] for occurance of rax 3.546 - // Set NZ/Z based on last compare 3.547 - __ movptr(rax, super_klass); 3.548 - if (UseCompressedOops) { 3.549 - // Compare against compressed form. Don't need to uncompress because 3.550 - // looks like orig rax is restored in popq below. 3.551 - __ encode_heap_oop(rax); 3.552 - __ repne_scanl(); 3.553 - } else { 3.554 - __ repne_scan(); 3.555 - } 3.556 - 3.557 - // Unspill the temp. registers: 3.558 - __ pop(rdi); 3.559 - __ pop(rcx); 3.560 - __ pop(rax); 3.561 - 3.562 - __ jcc(Assembler::notEqual, L_miss); 3.563 - } 3.564 - 3.565 - // Success. Cache the super we found and proceed in triumph. 3.566 - __ movptr(super_cache_addr, super_klass); // note: rax is dead 3.567 - __ jmp(L_success); 3.568 - 3.569 - // Fall through on failure! 3.570 - __ BIND(L_miss); 3.571 - } 3.572 - 3.573 - // 3.574 - // Generate checkcasting array copy stub 3.575 - // 3.576 - // Input: 3.577 - // c_rarg0 - source array address 3.578 - // c_rarg1 - destination array address 3.579 - // c_rarg2 - element count, treated as ssize_t, can be zero 3.580 - // c_rarg3 - size_t ckoff (super_check_offset) 3.581 - // not Win64 3.582 - // c_rarg4 - oop ckval (super_klass) 3.583 - // Win64 3.584 - // rsp+40 - oop ckval (super_klass) 3.585 - // 3.586 - // Output: 3.587 - // rax == 0 - success 3.588 - // rax == -1^K - failure, where K is partial transfer count 3.589 - // 3.590 - address generate_checkcast_copy(const char *name) { 3.591 - 3.592 - Label L_load_element, L_store_element, L_do_card_marks, L_done; 3.593 - 3.594 - // Input registers (after setup_arg_regs) 3.595 - const Register from = rdi; // source array address 3.596 - const Register to = rsi; // destination array address 3.597 - const Register length = rdx; // elements count 3.598 - const Register ckoff = rcx; // super_check_offset 3.599 - const Register ckval = r8; // super_klass 3.600 - 3.601 - // Registers used as temps (r13, r14 are save-on-entry) 3.602 - const Register end_from = from; // source array end address 3.603 - const Register end_to = r13; // destination array end address 3.604 - const Register count = rdx; // -(count_remaining) 3.605 - const Register r14_length = r14; // saved copy of length 3.606 - // End pointers are inclusive, and if length is not zero they point 3.607 - // to the last unit copied: end_to[0] := end_from[0] 3.608 - 3.609 - const Register rax_oop = rax; // actual oop copied 3.610 - const Register r11_klass = r11; // oop._klass 3.611 - 3.612 - //--------------------------------------------------------------- 3.613 - // Assembler stub will be used for this call to arraycopy 3.614 - // if the two arrays are subtypes of Object[] but the 3.615 - // destination array type is not equal to or a supertype 3.616 - // of the source type. Each element must be separately 3.617 - // checked. 3.618 - 3.619 - __ align(CodeEntryAlignment); 3.620 - StubCodeMark mark(this, "StubRoutines", name); 3.621 - address start = __ pc(); 3.622 - 3.623 - __ enter(); // required for proper stackwalking of RuntimeStub frame 3.624 - 3.625 - checkcast_copy_entry = __ pc(); 3.626 - BLOCK_COMMENT("Entry:"); 3.627 - 3.628 -#ifdef ASSERT 3.629 - // caller guarantees that the arrays really are different 3.630 - // otherwise, we would have to make conjoint checks 3.631 - { Label L; 3.632 - array_overlap_test(L, TIMES_OOP); 3.633 - __ stop("checkcast_copy within a single array"); 3.634 - __ bind(L); 3.635 - } 3.636 -#endif //ASSERT 3.637 - 3.638 - // allocate spill slots for r13, r14 3.639 - enum { 3.640 - saved_r13_offset, 3.641 - saved_r14_offset, 3.642 - saved_rbp_offset, 3.643 - saved_rip_offset, 3.644 - saved_rarg0_offset 3.645 - }; 3.646 - __ subptr(rsp, saved_rbp_offset * wordSize); 3.647 - __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); 3.648 - __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); 3.649 - setup_arg_regs(4); // from => rdi, to => rsi, length => rdx 3.650 - // ckoff => rcx, ckval => r8 3.651 - // r9 and r10 may be used to save non-volatile registers 3.652 -#ifdef _WIN64 3.653 - // last argument (#4) is on stack on Win64 3.654 - const int ckval_offset = saved_rarg0_offset + 4; 3.655 - __ movptr(ckval, Address(rsp, ckval_offset * wordSize)); 3.656 -#endif 3.657 - 3.658 - // check that int operands are properly extended to size_t 3.659 - assert_clean_int(length, rax); 3.660 - assert_clean_int(ckoff, rax); 3.661 - 3.662 -#ifdef ASSERT 3.663 - BLOCK_COMMENT("assert consistent ckoff/ckval"); 3.664 - // The ckoff and ckval must be mutually consistent, 3.665 - // even though caller generates both. 3.666 - { Label L; 3.667 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 3.668 - Klass::super_check_offset_offset_in_bytes()); 3.669 - __ cmpl(ckoff, Address(ckval, sco_offset)); 3.670 - __ jcc(Assembler::equal, L); 3.671 - __ stop("super_check_offset inconsistent"); 3.672 - __ bind(L); 3.673 - } 3.674 -#endif //ASSERT 3.675 - 3.676 - // Loop-invariant addresses. They are exclusive end pointers. 3.677 - Address end_from_addr(from, length, TIMES_OOP, 0); 3.678 - Address end_to_addr(to, length, TIMES_OOP, 0); 3.679 - // Loop-variant addresses. They assume post-incremented count < 0. 3.680 - Address from_element_addr(end_from, count, TIMES_OOP, 0); 3.681 - Address to_element_addr(end_to, count, TIMES_OOP, 0); 3.682 - 3.683 - gen_write_ref_array_pre_barrier(to, count); 3.684 - 3.685 - // Copy from low to high addresses, indexed from the end of each array. 3.686 - __ lea(end_from, end_from_addr); 3.687 - __ lea(end_to, end_to_addr); 3.688 - __ movptr(r14_length, length); // save a copy of the length 3.689 - assert(length == count, ""); // else fix next line: 3.690 - __ negptr(count); // negate and test the length 3.691 - __ jcc(Assembler::notZero, L_load_element); 3.692 - 3.693 - // Empty array: Nothing to do. 3.694 - __ xorptr(rax, rax); // return 0 on (trivial) success 3.695 - __ jmp(L_done); 3.696 - 3.697 - // ======== begin loop ======== 3.698 - // (Loop is rotated; its entry is L_load_element.) 3.699 - // Loop control: 3.700 - // for (count = -count; count != 0; count++) 3.701 - // Base pointers src, dst are biased by 8*(count-1),to last element. 3.702 - __ align(16); 3.703 - 3.704 - __ BIND(L_store_element); 3.705 - __ store_heap_oop(rax_oop, to_element_addr); // store the oop 3.706 - __ sync(); 3.707 - __ increment(count); // increment the count toward zero 3.708 - __ jcc(Assembler::zero, L_do_card_marks); 3.709 - 3.710 - // ======== loop entry is here ======== 3.711 - __ BIND(L_load_element); 3.712 - __ load_heap_oop(rax_oop, from_element_addr); // load the oop 3.713 - __ testptr(rax_oop, rax_oop); 3.714 - __ jcc(Assembler::zero, L_store_element); 3.715 - 3.716 - __ load_klass(r11_klass, rax_oop);// query the object klass 3.717 - generate_type_check(r11_klass, ckoff, ckval, L_store_element); 3.718 - // ======== end loop ======== 3.719 - 3.720 - // It was a real error; we must depend on the caller to finish the job. 3.721 - // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. 3.722 - // Emit GC store barriers for the oops we have copied (r14 + rdx), 3.723 - // and report their number to the caller. 3.724 - assert_different_registers(rax, r14_length, count, to, end_to, rcx); 3.725 - __ lea(end_to, to_element_addr); 3.726 - gen_write_ref_array_post_barrier(to, end_to, rscratch1); 3.727 - __ movptr(rax, r14_length); // original oops 3.728 - __ addptr(rax, count); // K = (original - remaining) oops 3.729 - __ notptr(rax); // report (-1^K) to caller 3.730 - __ jmp(L_done); 3.731 - 3.732 - // Come here on success only. 3.733 - __ BIND(L_do_card_marks); 3.734 - __ addptr(end_to, -wordSize); // make an inclusive end pointer 3.735 - gen_write_ref_array_post_barrier(to, end_to, rscratch1); 3.736 - __ xorptr(rax, rax); // return 0 on success 3.737 - 3.738 - // Common exit point (success or failure). 3.739 - __ BIND(L_done); 3.740 - __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); 3.741 - __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); 3.742 - inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); 3.743 - restore_arg_regs(); 3.744 - __ leave(); // required for proper stackwalking of RuntimeStub frame 3.745 - __ ret(0); 3.746 - 3.747 - return start; 3.748 - } 3.749 - 3.750 - // 3.751 - // Generate 'unsafe' array copy stub 3.752 - // Though just as safe as the other stubs, it takes an unscaled 3.753 - // size_t argument instead of an element count. 3.754 - // 3.755 - // Input: 3.756 - // c_rarg0 - source array address 3.757 - // c_rarg1 - destination array address 3.758 - // c_rarg2 - byte count, treated as ssize_t, can be zero 3.759 - // 3.760 - // Examines the alignment of the operands and dispatches 3.761 - // to a long, int, short, or byte copy loop. 3.762 - // 3.763 - address generate_unsafe_copy(const char *name) { 3.764 - 3.765 - Label L_long_aligned, L_int_aligned, L_short_aligned; 3.766 - 3.767 - // Input registers (before setup_arg_regs) 3.768 - const Register from = c_rarg0; // source array address 3.769 - const Register to = c_rarg1; // destination array address 3.770 - const Register size = c_rarg2; // byte count (size_t) 3.771 - 3.772 - // Register used as a temp 3.773 - const Register bits = rax; // test copy of low bits 3.774 - 3.775 - __ align(CodeEntryAlignment); 3.776 - StubCodeMark mark(this, "StubRoutines", name); 3.777 - address start = __ pc(); 3.778 - 3.779 - __ enter(); // required for proper stackwalking of RuntimeStub frame 3.780 - 3.781 - // bump this on entry, not on exit: 3.782 - inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); 3.783 - 3.784 - __ mov(bits, from); 3.785 - __ orptr(bits, to); 3.786 - __ orptr(bits, size); 3.787 - 3.788 - __ testb(bits, BytesPerLong-1); 3.789 - __ jccb(Assembler::zero, L_long_aligned); 3.790 - 3.791 - __ testb(bits, BytesPerInt-1); 3.792 - __ jccb(Assembler::zero, L_int_aligned); 3.793 - 3.794 - __ testb(bits, BytesPerShort-1); 3.795 - __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); 3.796 - 3.797 - __ BIND(L_short_aligned); 3.798 - __ shrptr(size, LogBytesPerShort); // size => short_count 3.799 - __ jump(RuntimeAddress(short_copy_entry)); 3.800 - 3.801 - __ BIND(L_int_aligned); 3.802 - __ shrptr(size, LogBytesPerInt); // size => int_count 3.803 - __ jump(RuntimeAddress(int_copy_entry)); 3.804 - 3.805 - __ BIND(L_long_aligned); 3.806 - __ shrptr(size, LogBytesPerLong); // size => qword_count 3.807 - __ jump(RuntimeAddress(long_copy_entry)); 3.808 - 3.809 - return start; 3.810 - } 3.811 - 3.812 - // Perform range checks on the proposed arraycopy. 3.813 - // Kills temp, but nothing else. 3.814 - // Also, clean the sign bits of src_pos and dst_pos. 3.815 - void arraycopy_range_checks(Register src, // source array oop (c_rarg0) 3.816 - Register src_pos, // source position (c_rarg1) 3.817 - Register dst, // destination array oo (c_rarg2) 3.818 - Register dst_pos, // destination position (c_rarg3) 3.819 - Register length, 3.820 - Register temp, 3.821 - Label& L_failed) { 3.822 - BLOCK_COMMENT("arraycopy_range_checks:"); 3.823 - 3.824 - // if (src_pos + length > arrayOop(src)->length()) FAIL; 3.825 - __ movl(temp, length); 3.826 - __ addl(temp, src_pos); // src_pos + length 3.827 - __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); 3.828 - __ jcc(Assembler::above, L_failed); 3.829 - 3.830 - // if (dst_pos + length > arrayOop(dst)->length()) FAIL; 3.831 - __ movl(temp, length); 3.832 - __ addl(temp, dst_pos); // dst_pos + length 3.833 - __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); 3.834 - __ jcc(Assembler::above, L_failed); 3.835 - 3.836 - // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. 3.837 - // Move with sign extension can be used since they are positive. 3.838 - __ movslq(src_pos, src_pos); 3.839 - __ movslq(dst_pos, dst_pos); 3.840 - 3.841 - BLOCK_COMMENT("arraycopy_range_checks done"); 3.842 - } 3.843 - 3.844 - // 3.845 - // Generate generic array copy stubs 3.846 - // 3.847 - // Input: 3.848 - // c_rarg0 - src oop 3.849 - // c_rarg1 - src_pos (32-bits) 3.850 - // c_rarg2 - dst oop 3.851 - // c_rarg3 - dst_pos (32-bits) 3.852 - // not Win64 3.853 - // c_rarg4 - element count (32-bits) 3.854 - // Win64 3.855 - // rsp+40 - element count (32-bits) 3.856 - // 3.857 - // Output: 3.858 - // rax == 0 - success 3.859 - // rax == -1^K - failure, where K is partial transfer count 3.860 - // 3.861 - address generate_generic_copy(const char *name) { 3.862 - 3.863 - Label L_failed, L_failed_0, L_objArray; 3.864 - Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; 3.865 - 3.866 - // Input registers 3.867 - const Register src = c_rarg0; // source array oop 3.868 - const Register src_pos = c_rarg1; // source position 3.869 - const Register dst = c_rarg2; // destination array oop 3.870 - const Register dst_pos = c_rarg3; // destination position 3.871 - // elements count is on stack on Win64 3.872 -#ifdef _WIN64 3.873 -#define C_RARG4 Address(rsp, 6 * wordSize) 3.874 -#else 3.875 -#define C_RARG4 c_rarg4 3.876 -#endif 3.877 - 3.878 - { int modulus = CodeEntryAlignment; 3.879 - int target = modulus - 5; // 5 = sizeof jmp(L_failed) 3.880 - int advance = target - (__ offset() % modulus); 3.881 - if (advance < 0) advance += modulus; 3.882 - if (advance > 0) __ nop(advance); 3.883 - } 3.884 - StubCodeMark mark(this, "StubRoutines", name); 3.885 - 3.886 - // Short-hop target to L_failed. Makes for denser prologue code. 3.887 - __ BIND(L_failed_0); 3.888 - __ jmp(L_failed); 3.889 - assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); 3.890 - 3.891 - __ align(CodeEntryAlignment); 3.892 - address start = __ pc(); 3.893 - 3.894 - __ enter(); // required for proper stackwalking of RuntimeStub frame 3.895 - 3.896 - // bump this on entry, not on exit: 3.897 - inc_counter_np(SharedRuntime::_generic_array_copy_ctr); 3.898 - 3.899 - //----------------------------------------------------------------------- 3.900 - // Assembler stub will be used for this call to arraycopy 3.901 - // if the following conditions are met: 3.902 - // 3.903 - // (1) src and dst must not be null. 3.904 - // (2) src_pos must not be negative. 3.905 - // (3) dst_pos must not be negative. 3.906 - // (4) length must not be negative. 3.907 - // (5) src klass and dst klass should be the same and not NULL. 3.908 - // (6) src and dst should be arrays. 3.909 - // (7) src_pos + length must not exceed length of src. 3.910 - // (8) dst_pos + length must not exceed length of dst. 3.911 - // 3.912 - 3.913 - // if (src == NULL) return -1; 3.914 - __ testptr(src, src); // src oop 3.915 - size_t j1off = __ offset(); 3.916 - __ jccb(Assembler::zero, L_failed_0); 3.917 - 3.918 - // if (src_pos < 0) return -1; 3.919 - __ testl(src_pos, src_pos); // src_pos (32-bits) 3.920 - __ jccb(Assembler::negative, L_failed_0); 3.921 - 3.922 - // if (dst == NULL) return -1; 3.923 - __ testptr(dst, dst); // dst oop 3.924 - __ jccb(Assembler::zero, L_failed_0); 3.925 - 3.926 - // if (dst_pos < 0) return -1; 3.927 - __ testl(dst_pos, dst_pos); // dst_pos (32-bits) 3.928 - size_t j4off = __ offset(); 3.929 - __ jccb(Assembler::negative, L_failed_0); 3.930 - 3.931 - // The first four tests are very dense code, 3.932 - // but not quite dense enough to put four 3.933 - // jumps in a 16-byte instruction fetch buffer. 3.934 - // That's good, because some branch predicters 3.935 - // do not like jumps so close together. 3.936 - // Make sure of this. 3.937 - guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); 3.938 - 3.939 - // registers used as temp 3.940 - const Register r11_length = r11; // elements count to copy 3.941 - const Register r10_src_klass = r10; // array klass 3.942 - const Register r9_dst_klass = r9; // dest array klass 3.943 - 3.944 - // if (length < 0) return -1; 3.945 - __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) 3.946 - __ testl(r11_length, r11_length); 3.947 - __ jccb(Assembler::negative, L_failed_0); 3.948 - 3.949 - __ load_klass(r10_src_klass, src); 3.950 -#ifdef ASSERT 3.951 - // assert(src->klass() != NULL); 3.952 - BLOCK_COMMENT("assert klasses not null"); 3.953 - { Label L1, L2; 3.954 - __ testptr(r10_src_klass, r10_src_klass); 3.955 - __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL 3.956 - __ bind(L1); 3.957 - __ stop("broken null klass"); 3.958 - __ bind(L2); 3.959 - __ load_klass(r9_dst_klass, dst); 3.960 - __ cmpq(r9_dst_klass, 0); 3.961 - __ jcc(Assembler::equal, L1); // this would be broken also 3.962 - BLOCK_COMMENT("assert done"); 3.963 - } 3.964 -#endif 3.965 - 3.966 - // Load layout helper (32-bits) 3.967 - // 3.968 - // |array_tag| | header_size | element_type | |log2_element_size| 3.969 - // 32 30 24 16 8 2 0 3.970 - // 3.971 - // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 3.972 - // 3.973 - 3.974 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 3.975 - Klass::layout_helper_offset_in_bytes(); 3.976 - 3.977 - const Register rax_lh = rax; // layout helper 3.978 - 3.979 - __ movl(rax_lh, Address(r10_src_klass, lh_offset)); 3.980 - 3.981 - // Handle objArrays completely differently... 3.982 - jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 3.983 - __ cmpl(rax_lh, objArray_lh); 3.984 - __ jcc(Assembler::equal, L_objArray); 3.985 - 3.986 - // if (src->klass() != dst->klass()) return -1; 3.987 - __ load_klass(r9_dst_klass, dst); 3.988 - __ cmpq(r10_src_klass, r9_dst_klass); 3.989 - __ jcc(Assembler::notEqual, L_failed); 3.990 - 3.991 - // if (!src->is_Array()) return -1; 3.992 - __ cmpl(rax_lh, Klass::_lh_neutral_value); 3.993 - __ jcc(Assembler::greaterEqual, L_failed); 3.994 - 3.995 - // At this point, it is known to be a typeArray (array_tag 0x3). 3.996 -#ifdef ASSERT 3.997 - { Label L; 3.998 - __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); 3.999 - __ jcc(Assembler::greaterEqual, L); 3.1000 - __ stop("must be a primitive array"); 3.1001 - __ bind(L); 3.1002 - } 3.1003 -#endif 3.1004 - 3.1005 - arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 3.1006 - r10, L_failed); 3.1007 - 3.1008 - // typeArrayKlass 3.1009 - // 3.1010 - // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 3.1011 - // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 3.1012 - // 3.1013 - 3.1014 - const Register r10_offset = r10; // array offset 3.1015 - const Register rax_elsize = rax_lh; // element size 3.1016 - 3.1017 - __ movl(r10_offset, rax_lh); 3.1018 - __ shrl(r10_offset, Klass::_lh_header_size_shift); 3.1019 - __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset 3.1020 - __ addptr(src, r10_offset); // src array offset 3.1021 - __ addptr(dst, r10_offset); // dst array offset 3.1022 - BLOCK_COMMENT("choose copy loop based on element size"); 3.1023 - __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize 3.1024 - 3.1025 - // next registers should be set before the jump to corresponding stub 3.1026 - const Register from = c_rarg0; // source array address 3.1027 - const Register to = c_rarg1; // destination array address 3.1028 - const Register count = c_rarg2; // elements count 3.1029 - 3.1030 - // 'from', 'to', 'count' registers should be set in such order 3.1031 - // since they are the same as 'src', 'src_pos', 'dst'. 3.1032 - 3.1033 - __ BIND(L_copy_bytes); 3.1034 - __ cmpl(rax_elsize, 0); 3.1035 - __ jccb(Assembler::notEqual, L_copy_shorts); 3.1036 - __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr 3.1037 - __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr 3.1038 - __ movl2ptr(count, r11_length); // length 3.1039 - __ jump(RuntimeAddress(byte_copy_entry)); 3.1040 - 3.1041 - __ BIND(L_copy_shorts); 3.1042 - __ cmpl(rax_elsize, LogBytesPerShort); 3.1043 - __ jccb(Assembler::notEqual, L_copy_ints); 3.1044 - __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr 3.1045 - __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr 3.1046 - __ movl2ptr(count, r11_length); // length 3.1047 - __ jump(RuntimeAddress(short_copy_entry)); 3.1048 - 3.1049 - __ BIND(L_copy_ints); 3.1050 - __ cmpl(rax_elsize, LogBytesPerInt); 3.1051 - __ jccb(Assembler::notEqual, L_copy_longs); 3.1052 - __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr 3.1053 - __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr 3.1054 - __ movl2ptr(count, r11_length); // length 3.1055 - __ jump(RuntimeAddress(int_copy_entry)); 3.1056 - 3.1057 - __ BIND(L_copy_longs); 3.1058 -#ifdef ASSERT 3.1059 - { Label L; 3.1060 - __ cmpl(rax_elsize, LogBytesPerLong); 3.1061 - __ jcc(Assembler::equal, L); 3.1062 - __ stop("must be long copy, but elsize is wrong"); 3.1063 - __ bind(L); 3.1064 - } 3.1065 -#endif 3.1066 - __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr 3.1067 - __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr 3.1068 - __ movl2ptr(count, r11_length); // length 3.1069 - __ jump(RuntimeAddress(long_copy_entry)); 3.1070 - 3.1071 - // objArrayKlass 3.1072 - __ BIND(L_objArray); 3.1073 - // live at this point: r10_src_klass, src[_pos], dst[_pos] 3.1074 - 3.1075 - Label L_plain_copy, L_checkcast_copy; 3.1076 - // test array classes for subtyping 3.1077 - __ load_klass(r9_dst_klass, dst); 3.1078 - __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality 3.1079 - __ jcc(Assembler::notEqual, L_checkcast_copy); 3.1080 - 3.1081 - // Identically typed arrays can be copied without element-wise checks. 3.1082 - arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 3.1083 - r10, L_failed); 3.1084 - 3.1085 - __ lea(from, Address(src, src_pos, TIMES_OOP, 3.1086 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr 3.1087 - __ lea(to, Address(dst, dst_pos, TIMES_OOP, 3.1088 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr 3.1089 - __ movl2ptr(count, r11_length); // length 3.1090 - __ BIND(L_plain_copy); 3.1091 - __ jump(RuntimeAddress(oop_copy_entry)); 3.1092 - 3.1093 - __ BIND(L_checkcast_copy); 3.1094 - // live at this point: r10_src_klass, !r11_length 3.1095 - { 3.1096 - // assert(r11_length == C_RARG4); // will reload from here 3.1097 - Register r11_dst_klass = r11; 3.1098 - __ load_klass(r11_dst_klass, dst); 3.1099 - 3.1100 - // Before looking at dst.length, make sure dst is also an objArray. 3.1101 - __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); 3.1102 - __ jcc(Assembler::notEqual, L_failed); 3.1103 - 3.1104 - // It is safe to examine both src.length and dst.length. 3.1105 -#ifndef _WIN64 3.1106 - arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4, 3.1107 - rax, L_failed); 3.1108 -#else 3.1109 - __ movl(r11_length, C_RARG4); // reload 3.1110 - arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 3.1111 - rax, L_failed); 3.1112 - __ load_klass(r11_dst_klass, dst); // reload 3.1113 -#endif 3.1114 - 3.1115 - // Marshal the base address arguments now, freeing registers. 3.1116 - __ lea(from, Address(src, src_pos, TIMES_OOP, 3.1117 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 3.1118 - __ lea(to, Address(dst, dst_pos, TIMES_OOP, 3.1119 - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 3.1120 - __ movl(count, C_RARG4); // length (reloaded) 3.1121 - Register sco_temp = c_rarg3; // this register is free now 3.1122 - assert_different_registers(from, to, count, sco_temp, 3.1123 - r11_dst_klass, r10_src_klass); 3.1124 - assert_clean_int(count, sco_temp); 3.1125 - 3.1126 - // Generate the type check. 3.1127 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 3.1128 - Klass::super_check_offset_offset_in_bytes()); 3.1129 - __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 3.1130 - assert_clean_int(sco_temp, rax); 3.1131 - generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); 3.1132 - 3.1133 - // Fetch destination element klass from the objArrayKlass header. 3.1134 - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 3.1135 - objArrayKlass::element_klass_offset_in_bytes()); 3.1136 - __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); 3.1137 - __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 3.1138 - assert_clean_int(sco_temp, rax); 3.1139 - 3.1140 - // the checkcast_copy loop needs two extra arguments: 3.1141 - assert(c_rarg3 == sco_temp, "#3 already in place"); 3.1142 - __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass 3.1143 - __ jump(RuntimeAddress(checkcast_copy_entry)); 3.1144 - } 3.1145 - 3.1146 - __ BIND(L_failed); 3.1147 - __ xorptr(rax, rax); 3.1148 - __ notptr(rax); // return -1 3.1149 - __ leave(); // required for proper stackwalking of RuntimeStub frame 3.1150 - __ ret(0); 3.1151 - 3.1152 - return start; 3.1153 - } 3.1154 - 3.1155 -#undef length_arg 3.1156 -#endif 3.1157 3.1158 //FIXME 3.1159 address generate_disjoint_long_copy(bool aligned, const char *name) {
4.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp Tue Sep 20 10:40:42 2016 +0800 4.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp Sun Oct 09 14:48:55 2016 +0800 4.3 @@ -633,6 +633,11 @@ 4.4 // check array 4.5 __ null_check(array, arrayOopDesc::length_offset_in_bytes()); 4.6 4.7 +#ifdef _LP64 4.8 + // sign extend since tos (index) might contain garbage in upper bits 4.9 + __ sll(index, index, 0); 4.10 +#endif // _LP64 4.11 + 4.12 // check index 4.13 Label ok; 4.14 __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
5.1 --- a/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp Tue Sep 20 10:40:42 2016 +0800 5.2 +++ b/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp Sun Oct 09 14:48:55 2016 +0800 5.3 @@ -167,8 +167,12 @@ 5.4 5.5 static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { 5.6 assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); 5.7 - // pd_conjoint_words is word-atomic in this implementation. 5.8 - pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count); 5.9 + if(UseCompressedOops) { 5.10 + copy_conjoint_atomic<jint>((jint*)from, (jint*)to, count); 5.11 + } else { 5.12 + // pd_conjoint_words is word-atomic in this implementation. 5.13 + pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count); 5.14 + } 5.15 } 5.16 5.17 static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
6.1 --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp Tue Sep 20 10:40:42 2016 +0800 6.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp Sun Oct 09 14:48:55 2016 +0800 6.3 @@ -85,7 +85,13 @@ 6.4 // Card marking 6.5 void inline_write_ref_field_gc(void* field, oop new_val) { 6.6 jbyte* byte = byte_for(field); 6.7 +#ifdef MIPS64 6.8 + OrderAccess::fence(); 6.9 +#endif 6.10 *byte = youngergen_card; 6.11 +#ifdef MIPS64 6.12 + OrderAccess::fence(); 6.13 +#endif 6.14 } 6.15 6.16 // Adaptive size policy support
7.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Tue Sep 20 10:40:42 2016 +0800 7.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Sun Oct 09 14:48:55 2016 +0800 7.3 @@ -3239,6 +3239,9 @@ 7.4 OrderAccess::fence(); 7.5 #endif 7.6 sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); 7.7 +#ifdef MIPS64 7.8 + OrderAccess::fence(); 7.9 +#endif 7.10 } 7.11 7.12 const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
8.1 --- a/src/share/vm/memory/barrierSet.hpp Tue Sep 20 10:40:42 2016 +0800 8.2 +++ b/src/share/vm/memory/barrierSet.hpp Sun Oct 09 14:48:55 2016 +0800 8.3 @@ -27,6 +27,7 @@ 8.4 8.5 #include "memory/memRegion.hpp" 8.6 #include "oops/oopsHierarchy.hpp" 8.7 +#include "runtime/orderAccess.hpp" 8.8 8.9 // This class provides the interface between a barrier implementation and 8.10 // the rest of the system. 8.11 @@ -95,8 +96,16 @@ 8.12 // Keep this private so as to catch violations at build time. 8.13 virtual void write_ref_field_pre_work( void* field, oop new_val) { guarantee(false, "Not needed"); }; 8.14 protected: 8.15 - virtual void write_ref_field_pre_work( oop* field, oop new_val) {}; 8.16 - virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {}; 8.17 + virtual void write_ref_field_pre_work( oop* field, oop new_val) { 8.18 +#ifdef MIPS64 8.19 + OrderAccess::fence(); 8.20 +#endif 8.21 + }; 8.22 + virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) { 8.23 +#ifdef MIPS64 8.24 + OrderAccess::fence(); 8.25 +#endif 8.26 + }; 8.27 public: 8.28 8.29 // ...then the post-write version. 8.30 @@ -132,9 +141,17 @@ 8.31 8.32 // Below length is the # array elements being written 8.33 virtual void write_ref_array_pre(oop* dst, int length, 8.34 - bool dest_uninitialized = false) {} 8.35 + bool dest_uninitialized = false) { 8.36 +#ifdef MIPS64 8.37 + OrderAccess::fence(); 8.38 +#endif 8.39 + } 8.40 virtual void write_ref_array_pre(narrowOop* dst, int length, 8.41 - bool dest_uninitialized = false) {} 8.42 + bool dest_uninitialized = false) { 8.43 +#ifdef MIPS64 8.44 + OrderAccess::fence(); 8.45 +#endif 8.46 +} 8.47 // Below count is the # array elements being written, starting 8.48 // at the address "start", which may not necessarily be HeapWord-aligned 8.49 inline void write_ref_array(HeapWord* start, size_t count);
9.1 --- a/src/share/vm/memory/cardTableModRefBS.hpp Tue Sep 20 10:40:42 2016 +0800 9.2 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Sun Oct 09 14:48:55 2016 +0800 9.3 @@ -309,6 +309,9 @@ 9.4 9.5 inline void inline_write_ref_array(MemRegion mr) { 9.6 dirty_MemRegion(mr); 9.7 +#ifdef MIPS64 9.8 + OrderAccess::fence(); 9.9 +#endif 9.10 } 9.11 protected: 9.12 void write_ref_array_work(MemRegion mr) { 9.13 @@ -322,7 +325,11 @@ 9.14 9.15 // *** Card-table-barrier-specific things. 9.16 9.17 - template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {} 9.18 + template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) { 9.19 +#ifdef MIPS64 9.20 + OrderAccess::fence(); 9.21 +#endif 9.22 + } 9.23 9.24 template <class T> inline void inline_write_ref_field(T* field, oop newVal, bool release) { 9.25 jbyte* byte = byte_for((void*)field); 9.26 @@ -332,6 +339,9 @@ 9.27 } else { 9.28 *byte = dirty_card; 9.29 } 9.30 +#ifdef MIPS64 9.31 + OrderAccess::fence(); 9.32 +#endif 9.33 } 9.34 9.35 // These are used by G1, when it uses the card table as a temporary data
10.1 --- a/src/share/vm/memory/cardTableRS.cpp Tue Sep 20 10:40:42 2016 +0800 10.2 +++ b/src/share/vm/memory/cardTableRS.cpp Sun Oct 09 14:48:55 2016 +0800 10.3 @@ -251,6 +251,9 @@ 10.4 // cur_youngergen_and_prev_nonclean_card ==> no change. 10.5 void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { 10.6 jbyte* entry = ct_bs()->byte_for(field); 10.7 +#ifdef MIPS64 10.8 + OrderAccess::fence(); 10.9 +#endif 10.10 do { 10.11 jbyte entry_val = *entry; 10.12 // We put this first because it's probably the most common case. 10.13 @@ -265,7 +268,12 @@ 10.14 jbyte new_val = cur_youngergen_and_prev_nonclean_card; 10.15 jbyte res = Atomic::cmpxchg(new_val, entry, entry_val); 10.16 // Did the CAS succeed? 10.17 - if (res == entry_val) return; 10.18 + if (res == entry_val) { 10.19 + #ifdef MIPS64 10.20 + OrderAccess::fence(); 10.21 + #endif 10.22 + return; 10.23 + } 10.24 // Otherwise, retry, to see the new value. 10.25 continue; 10.26 } else {
11.1 --- a/src/share/vm/memory/cardTableRS.hpp Tue Sep 20 10:40:42 2016 +0800 11.2 +++ b/src/share/vm/memory/cardTableRS.hpp Sun Oct 09 14:48:55 2016 +0800 11.3 @@ -121,7 +121,14 @@ 11.4 11.5 void inline_write_ref_field_gc(void* field, oop new_val) { 11.6 jbyte* byte = _ct_bs->byte_for(field); 11.7 - *byte = youngergen_card; 11.8 +#ifdef MIPS64 11.9 + OrderAccess::fence(); 11.10 +#endif 11.11 + *byte = youngergen_card; 11.12 +#ifdef MIPS64 11.13 + OrderAccess::fence(); 11.14 +#endif 11.15 + 11.16 } 11.17 void write_ref_field_gc_work(void* field, oop new_val) { 11.18 inline_write_ref_field_gc(field, new_val);