merge

Sun, 09 Oct 2016 14:48:55 +0800

author
aoqi
date
Sun, 09 Oct 2016 14:48:55 +0800
changeset 119
659662477125
parent 118
bf4b1d1988a6
parent 116
09e17e497778
child 120
6cd87342c0b1

merge

src/cpu/mips/vm/stubGenerator_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/assembler_mips.cpp	Tue Sep 20 10:40:42 2016 +0800
     1.2 +++ b/src/cpu/mips/vm/assembler_mips.cpp	Sun Oct 09 14:48:55 2016 +0800
     1.3 @@ -1926,6 +1926,7 @@
     1.4  #endif
     1.5    sync();
     1.6    sb(R0, AT, 0);
     1.7 +  sync();
     1.8  }
     1.9  /*
    1.10  void MacroAssembler::subptr(Register dst, int32_t imm32) {
     2.1 --- a/src/cpu/mips/vm/mips_64.ad	Tue Sep 20 10:40:42 2016 +0800
     2.2 +++ b/src/cpu/mips/vm/mips_64.ad	Sun Oct 09 14:48:55 2016 +0800
     2.3 @@ -1947,56 +1947,52 @@
     2.4  
     2.5       guarantee(scale == 0, "scale is not zero !");
     2.6  
     2.7 +     __ sync();
     2.8 +
     2.9       if( index != 0 ) {
    2.10          __ daddu(AT, as_Register(base), as_Register(index));
    2.11          if( Assembler::is_simm16(disp) ) { 
    2.12             if (value == 0) {
    2.13 -              __ sync();
    2.14                __ sb(R0, AT, disp);
    2.15             } else {
    2.16                __ move(T9, value);
    2.17 -              __ sync();
    2.18                __ sb(T9, AT, disp);
    2.19             }
    2.20          } else {
    2.21             if (value == 0) {
    2.22                __ move(T9, disp);
    2.23                __ daddu(AT, AT, T9); 
    2.24 -              __ sync();
    2.25                __ sb(R0, AT, 0);
    2.26             } else {
    2.27                __ move(T9, disp);
    2.28                __ daddu(AT, AT, T9); 
    2.29                __ move(T9, value);
    2.30 -              __ sync();
    2.31                __ sb(T9, AT, 0);
    2.32             }
    2.33          }    
    2.34       } else {
    2.35          if( Assembler::is_simm16(disp) ) { 
    2.36             if (value == 0) {
    2.37 -              __ sync();
    2.38                __ sb(R0, as_Register(base), disp);
    2.39             } else {
    2.40                __ move(AT, value);
    2.41 -              __ sync();
    2.42                __ sb(AT, as_Register(base), disp);
    2.43             }
    2.44          } else {
    2.45             if (value == 0) {
    2.46                __ move(T9, disp);   
    2.47                __ daddu(AT, as_Register(base), T9); 
    2.48 -              __ sync();
    2.49                __ sb(R0, AT, 0);
    2.50             } else {
    2.51                __ move(T9, disp);   
    2.52                __ daddu(AT, as_Register(base), T9); 
    2.53                __ move(T9, value);
    2.54 -              __ sync();
    2.55                __ sb(T9, AT, 0);
    2.56             }
    2.57          }    
    2.58       }
    2.59 +
    2.60 +     __ sync();
    2.61    %}
    2.62  
    2.63    // Load Short (16bit signed)
     3.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Tue Sep 20 10:40:42 2016 +0800
     3.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Sun Oct 09 14:48:55 2016 +0800
     3.3 @@ -564,16 +564,6 @@
     3.4    //  Generate store check for array
     3.5    //
     3.6    //  Input:
     3.7 -  //     %edi    -  starting address
     3.8 -  //     %ecx    -  element count
     3.9 -  //
    3.10 -  //  The 2 input registers are overwritten
    3.11 -  //
    3.12 - 
    3.13 -  //
    3.14 -  //  Generate store check for array
    3.15 -  //
    3.16 -  //  Input:
    3.17    //     T0    -  starting address(edi)
    3.18    //     T1    -  element count  (ecx)
    3.19    //
    3.20 @@ -1276,32 +1266,23 @@
    3.21    //   used by generate_conjoint_int_oop_copy().
    3.22    //
    3.23    address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
    3.24 -    Label l_2, l_3, l_4, l_5, l_6, l_7, l_stchk;
    3.25 +    Label l_3, l_4, l_5, l_6, l_7;
    3.26      StubCodeMark mark(this, "StubRoutines", name);
    3.27 +
    3.28      __ align(CodeEntryAlignment);
    3.29      address start = __ pc();
    3.30 -    __ push(T3);	
    3.31 -    __ push(T0);	
    3.32 -    __ push(T1);	
    3.33 -    __ push(T8);	
    3.34 -    __ move(T1, A2);  
    3.35 -    __ move(T3, A0); 
    3.36 +    __ push(T3);
    3.37 +    __ push(T0);
    3.38 +    __ push(T1);
    3.39 +    __ push(T8);
    3.40 +    __ move(T1, A2);
    3.41 +    __ move(T3, A0);
    3.42      __ move(T0, A1);
    3.43  
    3.44 -    __ b(l_2); 	
    3.45 -    __ delayed()->nop();	
    3.46      if (is_oop) {
    3.47 -      __ b(l_stchk); 
    3.48 -      __ delayed()->nop(); 
    3.49 +      __ sync();
    3.50      }
    3.51 -    __ pop(T8);	
    3.52 -    __ pop(T1);	
    3.53 -    __ pop(T0);	
    3.54 -    __ pop(T3);	
    3.55 -    __ jr(RA); 
    3.56 -    __ delayed()->nop(); 
    3.57  
    3.58 -    __ bind(l_2);
    3.59      if(!aligned) {
    3.60        __ xorr(AT, T3, T0);
    3.61        __ andi(AT, AT, 7);
    3.62 @@ -1318,8 +1299,9 @@
    3.63        __ daddi(T3, T3, 4);
    3.64        __ daddi(T0, T0, 4);
    3.65      }
    3.66 -    __ bind(l_6);
    3.67 +
    3.68      {
    3.69 +      __ bind(l_6);
    3.70        __ daddi(AT, T1, -1);
    3.71        __ blez(AT, l_5);
    3.72        __ delayed()->nop();
    3.73 @@ -1336,30 +1318,35 @@
    3.74      }
    3.75  
    3.76      __ bind(l_5);
    3.77 -    __ beq(T1, R0, l_4);  
    3.78 -    __ delayed()->nop(); 
    3.79 +    __ beq(T1, R0, l_4);
    3.80 +    __ delayed()->nop();
    3.81 +
    3.82      __ align(16);
    3.83      __ bind(l_3);
    3.84 -    __ lw(AT, T3, 0);   
    3.85 -    __ sw(AT, T0, 0); 
    3.86 +    __ lw(AT, T3, 0);
    3.87 +    __ sw(AT, T0, 0);
    3.88      __ addi(T3, T3, 4);
    3.89      __ addi(T0, T0, 4);
    3.90 -    __ addi(T1, T1, -1); 
    3.91 -    __ bne(T1, R0, l_3); 
    3.92 -    __ delayed()->nop(); 
    3.93 +    __ addi(T1, T1, -1);
    3.94 +    __ bne(T1, R0, l_3);
    3.95 +    __ delayed()->nop();
    3.96 +
    3.97      if (is_oop) {
    3.98 -      __ bind(l_stchk);
    3.99 -      __ move(T0, A1); 
   3.100 -      __ move(T1, A2); 
   3.101 +      __ move(T0, A1);
   3.102 +      __ move(T1, A2);
   3.103        array_store_check();
   3.104 +      __ sync();
   3.105      }
   3.106 +
   3.107 +    // exit
   3.108      __ bind(l_4);
   3.109      __ pop(T8);
   3.110      __ pop(T1);
   3.111      __ pop(T0);
   3.112      __ pop(T3);
   3.113 -    __ jr(RA); 
   3.114 -    __ delayed()->nop(); 
   3.115 +    __ jr(RA);
   3.116 +    __ delayed()->nop();
   3.117 +
   3.118      return start;
   3.119    }
   3.120  
   3.121 @@ -1379,7 +1366,7 @@
   3.122    // cache line boundaries will still be loaded and stored atomicly.
   3.123    //
   3.124    address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
   3.125 -		Label l_2, l_3, l_4, l_stchk;
   3.126 +		Label l_2, l_4;
   3.127  		StubCodeMark mark(this, "StubRoutines", name);
   3.128  		__ align(CodeEntryAlignment);
   3.129  		address start = __ pc();
   3.130 @@ -1402,84 +1389,51 @@
   3.131  		__ push(T1);
   3.132  		__ push(T8);
   3.133  
   3.134 -		/*
   3.135 -			 __ pushl(esi);
   3.136 -			 __ movl(ecx, Address(esp, 4+12));      // count
   3.137 -			 __ pushl(edi);
   3.138 -			 __ movl(esi, Address(esp, 8+ 4));      // from
   3.139 -			 __ movl(edi, Address(esp, 8+ 8));      // to
   3.140 -		 */ 
   3.141  		__ move(T1, A2);  
   3.142  		__ move(T3, A0); 
   3.143  		__ move(T0, A1);
   3.144 +                // T3: source array address
   3.145 +                // T0: destination array address
   3.146 +                // T1: element count
   3.147  
   3.148 -		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
   3.149 +		if (is_oop) {
   3.150 +                        __ sync();
   3.151 +		}
   3.152 +
   3.153  		__ sll(AT, T1, Address::times_4); 
   3.154  		__ add(AT, T3, AT); 
   3.155  		__ lea(T3 , Address(AT, -4)); 
   3.156 -		//__ std();
   3.157 -		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
   3.158  		__ sll(AT, T1, Address::times_4); 
   3.159  		__ add(AT, T0, AT); 
   3.160  		__ lea(T0 , Address(AT, -4)); 
   3.161  
   3.162 -		//    __ cmpl(ecx, 32);
   3.163 -		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
   3.164 -		//  __ testl(ecx, ecx);
   3.165 -		//__ jcc(Assembler::zero, l_4);
   3.166  		__ beq(T1, R0, l_4); 
   3.167  		__ delayed()->nop();  
   3.168 -		// __ subl(edi, esi);
   3.169 +
   3.170  		__ align(16);
   3.171  		__ bind(l_2);
   3.172 -		// __ movl(edx, Address(esi));
   3.173  		__ lw(AT, T3, 0);   
   3.174 -		// __ movl(Address(esi, edi, Address::times_1), edx);
   3.175  		__ sw(AT, T0, 0); 
   3.176 -		// __ subl(esi, 4);
   3.177  		__ addi(T3, T3, -4); 
   3.178  		__ addi(T0, T0, -4); 
   3.179 -		//   __ decl(ecx);
   3.180  		__ addi(T1, T1, -1); 
   3.181 -		//__ jcc(Assembler::notEqual, l_2);
   3.182  		__ bne(T1, R0, l_2);  
   3.183  		__ delayed()->nop(); 
   3.184 +
   3.185  		if (is_oop) {
   3.186 -			// __ jmp(l_stchk);
   3.187 -			__ b( l_stchk); 
   3.188 -			__ delayed()->nop(); 
   3.189 +			__ move(T0, A1);  
   3.190 +			__ move(T1, A2);  
   3.191 +			array_store_check();
   3.192 +                        __ sync();
   3.193  		}
   3.194  		__ bind(l_4);
   3.195 -		//      __ cld();
   3.196 -		//     __ popl(edi);
   3.197 -		//    __ popl(esi);
   3.198 -		//   __ ret(0);
   3.199  		__ pop(T8); 
   3.200  		__ pop(T1); 
   3.201  		__ pop(T0); 
   3.202  		__ pop(T3); 
   3.203  		__ jr(RA); 
   3.204  		__ delayed()->nop(); 
   3.205 -		__ bind(l_3);
   3.206 -		//   __ rep_movl();
   3.207 -		if (is_oop) {
   3.208 -			__ bind(l_stchk);
   3.209 -			//  __ movl(edi, Address(esp, 8+ 8));
   3.210 -			__ move(T0, A1);  
   3.211 -			// __ movl(ecx, Address(esp, 8+ 12));
   3.212 -			__ move(T1, A2);  
   3.213 -			array_store_check();
   3.214 -		}
   3.215 -		//    __ cld();
   3.216 -		//   __ popl(edi);
   3.217 -		//   __ popl(esi);
   3.218 -		//  __ ret(0);
   3.219 -		__ pop(T8);	
   3.220 -		__ pop(T1);	
   3.221 -		__ pop(T0);	
   3.222 -		__ pop(T3);	
   3.223 -		__ jr(RA);	
   3.224 -		__ delayed()->nop(); 
   3.225 +
   3.226  		return start;
   3.227    }
   3.228  
   3.229 @@ -1503,70 +1457,49 @@
   3.230    //   used by generate_conjoint_int_oop_copy().
   3.231    //
   3.232    address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
   3.233 -		Label l_2, l_3, l_4, l_stchk;
   3.234 +		Label l_3, l_4;
   3.235  		StubCodeMark mark(this, "StubRoutines", name);
   3.236  		__ align(CodeEntryAlignment);
   3.237  		address start = __ pc();
   3.238 +
   3.239  		__ push(T3);	
   3.240  		__ push(T0);	
   3.241  		__ push(T1);	
   3.242  		__ push(T8);	
   3.243 +
   3.244  		__ move(T1, A2);  
   3.245  		__ move(T3, A0); 
   3.246  		__ move(T0, A1);
   3.247 +                // T3: source array address
   3.248 +                // T0: destination array address
   3.249 +                // T1: element count
   3.250  
   3.251 -		// __ cmpl(ecx, 32);
   3.252 -		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
   3.253 -		// __ rep_movl();
   3.254 -		__ b(l_2); 	
   3.255 -		__ delayed()->nop();	
   3.256  		if (is_oop) {
   3.257 -		//  __ jmp(l_stchk);
   3.258 -			__ b(l_stchk); 
   3.259 -			__ delayed()->nop(); 
   3.260 +                        __ sync();
   3.261  		}
   3.262 -		//    __ popl(edi);
   3.263 -		//   __ popl(esi);
   3.264 -		//  __ ret(0);
   3.265 -		__ pop(T8);	
   3.266 -		__ pop(T1);	
   3.267 -		__ pop(T0);	
   3.268 -		__ pop(T3);	
   3.269 -		__ jr(RA); 
   3.270 +
   3.271 +		__ beq(T1, R0, l_4);  
   3.272  		__ delayed()->nop(); 
   3.273  
   3.274 -		__ bind(l_2);
   3.275 -		//  __ subl(edi, esi);
   3.276 -		//  __ testl(ecx, ecx);
   3.277 -		// __ jcc(Assembler::zero, l_4);
   3.278 -		__ beq(T1, R0, l_4);  
   3.279 -		__ delayed()->nop(); 
   3.280  		__ align(16);
   3.281  		__ bind(l_3);
   3.282 -		//__ movl(edx, Address(esi));
   3.283  		__ ld(AT, T3, 0);   
   3.284 -		// __ movl(Address(edi, esi, Address::times_1), edx);
   3.285  		__ sd(AT, T0, 0); 
   3.286 -		// __ addl(esi, 4);
   3.287  		__ addi(T3, T3, 8);
   3.288  		__ addi(T0, T0, 8);
   3.289 -		//   __ decl(ecx);
   3.290  		__ addi(T1, T1, -1); 
   3.291 -		//    __ jcc(Assembler::notEqual, l_3);
   3.292  		__ bne(T1, R0, l_3); 
   3.293  		__ delayed()->nop(); 
   3.294 +
   3.295  		if (is_oop) {
   3.296 -			__ bind(l_stchk);
   3.297 -			//      __ movl(edi, Address(esp, 8+ 8));
   3.298 -			//     __ movl(ecx, Address(esp, 8+ 12));
   3.299  			__ move(T0, A1); 
   3.300  			__ move(T1, A2); 
   3.301  			array_store_check();
   3.302 +                        __ sync();
   3.303  		}
   3.304 +
   3.305 +                // exit
   3.306  		__ bind(l_4);
   3.307 -		//    __ popl(edi);
   3.308 -		//   __ popl(esi);
   3.309 -		//  __ ret(0);
   3.310  		__ pop(T8);
   3.311  		__ pop(T1);
   3.312  		__ pop(T0);
   3.313 @@ -1592,7 +1525,7 @@
   3.314    // cache line boundaries will still be loaded and stored atomicly.
   3.315    //
   3.316    address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
   3.317 -		Label l_2, l_3, l_4, l_stchk;
   3.318 +		Label l_2, l_4;
   3.319  		StubCodeMark mark(this, "StubRoutines", name);
   3.320  		__ align(CodeEntryAlignment);
   3.321  		address start = __ pc();
   3.322 @@ -1619,67 +1552,37 @@
   3.323  		__ move(T3, A0); 
   3.324  		__ move(T0, A1);
   3.325  
   3.326 -		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
   3.327 +		if (is_oop) {
   3.328 +			__ sync();
   3.329 +		}
   3.330 +
   3.331  		__ sll(AT, T1, Address::times_8); 
   3.332  		__ add(AT, T3, AT); 
   3.333  		__ lea(T3 , Address(AT, -8)); 
   3.334 -		//__ std();
   3.335 -		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
   3.336  		__ sll(AT, T1, Address::times_8); 
   3.337  		__ add(AT, T0, AT); 
   3.338  		__ lea(T0 , Address(AT, -8)); 
   3.339  
   3.340 -		//    __ cmpl(ecx, 32);
   3.341 -		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
   3.342 -		//  __ testl(ecx, ecx);
   3.343 -		//__ jcc(Assembler::zero, l_4);
   3.344  		__ beq(T1, R0, l_4); 
   3.345  		__ delayed()->nop();  
   3.346 -		// __ subl(edi, esi);
   3.347 +
   3.348  		__ align(16);
   3.349  		__ bind(l_2);
   3.350 -		// __ movl(edx, Address(esi));
   3.351  		__ ld(AT, T3, 0);   
   3.352 -		// __ movl(Address(esi, edi, Address::times_1), edx);
   3.353  		__ sd(AT, T0, 0); 
   3.354 -		// __ subl(esi, 4);
   3.355  		__ addi(T3, T3, -8); 
   3.356  		__ addi(T0, T0, -8); 
   3.357 -		//   __ decl(ecx);
   3.358  		__ addi(T1, T1, -1); 
   3.359 -		//__ jcc(Assembler::notEqual, l_2);
   3.360  		__ bne(T1, R0, l_2);  
   3.361  		__ delayed()->nop(); 
   3.362 +
   3.363  		if (is_oop) {
   3.364 -			// __ jmp(l_stchk);
   3.365 -			__ b( l_stchk); 
   3.366 -			__ delayed()->nop(); 
   3.367 +			__ move(T0, A1);  
   3.368 +			__ move(T1, A2);  
   3.369 +			array_store_check();
   3.370 +                        __ sync();
   3.371  		}
   3.372  		__ bind(l_4);
   3.373 -		//      __ cld();
   3.374 -		//     __ popl(edi);
   3.375 -		//    __ popl(esi);
   3.376 -		//   __ ret(0);
   3.377 -		__ pop(T8); 
   3.378 -		__ pop(T1); 
   3.379 -		__ pop(T0); 
   3.380 -		__ pop(T3); 
   3.381 -		__ jr(RA); 
   3.382 -		__ delayed()->nop(); 
   3.383 -		__ bind(l_3);
   3.384 -		//   __ rep_movl();
   3.385 -		if (is_oop) {
   3.386 -			__ bind(l_stchk);
   3.387 -			//  __ movl(edi, Address(esp, 8+ 8));
   3.388 -			__ move(T0, A1);  
   3.389 -			// __ movl(ecx, Address(esp, 8+ 12));
   3.390 -			__ move(T1, A2);  
   3.391 -			array_store_check();
   3.392 -		}
   3.393 -		//    __ cld();
   3.394 -		//   __ popl(edi);
   3.395 -		//   __ popl(esi);
   3.396 -		//  __ ret(0);
   3.397  		__ pop(T8);	
   3.398  		__ pop(T1);	
   3.399  		__ pop(T0);	
   3.400 @@ -1688,759 +1591,6 @@
   3.401  		__ delayed()->nop(); 
   3.402  		return start;
   3.403    }
   3.404 -#if 0
   3.405 -  // Arguments:
   3.406 -  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
   3.407 -  //             ignored
   3.408 -  //   is_oop  - true => oop array, so generate store check code
   3.409 -  //   name    - stub name string
   3.410 -  //
   3.411 -  // Inputs:
   3.412 -  //   c_rarg0   - source array address
   3.413 -  //   c_rarg1   - destination array address
   3.414 -  //   c_rarg2   - element count, treated as ssize_t, can be zero
   3.415 -  //
   3.416 -  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
   3.417 -    __ align(CodeEntryAlignment);
   3.418 -    StubCodeMark mark(this, "StubRoutines", name);
   3.419 -    address start = __ pc();
   3.420 -
   3.421 -    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
   3.422 -    const Register from        = rdi;  // source array address
   3.423 -    const Register to          = rsi;  // destination array address
   3.424 -    const Register qword_count = rdx;  // elements count
   3.425 -    const Register saved_count = rcx;
   3.426 -
   3.427 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   3.428 -    assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
   3.429 -
   3.430 -    address disjoint_copy_entry = NULL;
   3.431 -    if (is_oop) {
   3.432 -      assert(!UseCompressedOops, "shouldn't be called for compressed oops");
   3.433 -      disjoint_copy_entry = disjoint_oop_copy_entry;
   3.434 -      oop_copy_entry  = __ pc();
   3.435 -      array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
   3.436 -    } else {
   3.437 -      disjoint_copy_entry = disjoint_long_copy_entry;
   3.438 -      long_copy_entry = __ pc();
   3.439 -      array_overlap_test(disjoint_long_copy_entry, Address::times_8);
   3.440 -    }
   3.441 -    BLOCK_COMMENT("Entry:");
   3.442 -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
   3.443 -
   3.444 -    array_overlap_test(disjoint_copy_entry, Address::times_8);
   3.445 -    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
   3.446 -                      // r9 and r10 may be used to save non-volatile registers
   3.447 -
   3.448 -    // 'from', 'to' and 'qword_count' are now valid
   3.449 -
   3.450 -    if (is_oop) {
   3.451 -      // Save to and count for store barrier
   3.452 -      __ movptr(saved_count, qword_count);
   3.453 -      // No registers are destroyed by this call
   3.454 -      gen_write_ref_array_pre_barrier(to, saved_count);
   3.455 -    }
   3.456 -
   3.457 -    __ jmp(L_copy_32_bytes);
   3.458 -
   3.459 -    // Copy trailing qwords
   3.460 -  __ BIND(L_copy_8_bytes);
   3.461 -    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
   3.462 -    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
   3.463 -    __ decrement(qword_count);
   3.464 -    __ jcc(Assembler::notZero, L_copy_8_bytes);
   3.465 -
   3.466 -    if (is_oop) {
   3.467 -      __ jmp(L_exit);
   3.468 -    } else {
   3.469 -      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
   3.470 -      restore_arg_regs();
   3.471 -      __ xorptr(rax, rax); // return 0
   3.472 -      __ leave(); // required for proper stackwalking of RuntimeStub frame
   3.473 -      __ ret(0);
   3.474 -    }
   3.475 -
   3.476 -    // Copy in 32-bytes chunks
   3.477 -    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
   3.478 -
   3.479 -    if (is_oop) {
   3.480 -    __ BIND(L_exit);
   3.481 -      __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
   3.482 -      gen_write_ref_array_post_barrier(to, rcx, rax);
   3.483 -      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
   3.484 -    } else {
   3.485 -      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
   3.486 -    }
   3.487 -    restore_arg_regs();
   3.488 -    __ xorptr(rax, rax); // return 0
   3.489 -    __ leave(); // required for proper stackwalking of RuntimeStub frame
   3.490 -    __ ret(0);
   3.491 -
   3.492 -    return start;
   3.493 -  }
   3.494 -
   3.495 -
   3.496 -  // Helper for generating a dynamic type check.
   3.497 -  // Smashes no registers.
   3.498 -  void generate_type_check(Register sub_klass,
   3.499 -                           Register super_check_offset,
   3.500 -                           Register super_klass,
   3.501 -                           Label& L_success) {
   3.502 -    assert_different_registers(sub_klass, super_check_offset, super_klass);
   3.503 -
   3.504 -    BLOCK_COMMENT("type_check:");
   3.505 -
   3.506 -    Label L_miss;
   3.507 -
   3.508 -    // a couple of useful fields in sub_klass:
   3.509 -    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
   3.510 -                     Klass::secondary_supers_offset_in_bytes());
   3.511 -    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   3.512 -                     Klass::secondary_super_cache_offset_in_bytes());
   3.513 -    Address secondary_supers_addr(sub_klass, ss_offset);
   3.514 -    Address super_cache_addr(     sub_klass, sc_offset);
   3.515 -
   3.516 -    // if the pointers are equal, we are done (e.g., String[] elements)
   3.517 -    __ cmpptr(super_klass, sub_klass);
   3.518 -    __ jcc(Assembler::equal, L_success);
   3.519 -
   3.520 -    // check the supertype display:
   3.521 -    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
   3.522 -    __ cmpptr(super_klass, super_check_addr); // test the super type
   3.523 -    __ jcc(Assembler::equal, L_success);
   3.524 -
   3.525 -    // if it was a primary super, we can just fail immediately
   3.526 -    __ cmpl(super_check_offset, sc_offset);
   3.527 -    __ jcc(Assembler::notEqual, L_miss);
   3.528 -
   3.529 -    // Now do a linear scan of the secondary super-klass chain.
   3.530 -    // The repne_scan instruction uses fixed registers, which we must spill.
   3.531 -    // (We need a couple more temps in any case.)
   3.532 -    // This code is rarely used, so simplicity is a virtue here.
   3.533 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
   3.534 -    {
   3.535 -      __ push(rax);
   3.536 -      __ push(rcx);
   3.537 -      __ push(rdi);
   3.538 -      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
   3.539 -
   3.540 -      __ movptr(rdi, secondary_supers_addr);
   3.541 -      // Load the array length.
   3.542 -      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   3.543 -      // Skip to start of data.
   3.544 -      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   3.545 -      // Scan rcx words at [rdi] for occurance of rax
   3.546 -      // Set NZ/Z based on last compare
   3.547 -      __ movptr(rax, super_klass);
   3.548 -      if (UseCompressedOops) {
   3.549 -        // Compare against compressed form.  Don't need to uncompress because
   3.550 -        // looks like orig rax is restored in popq below.
   3.551 -        __ encode_heap_oop(rax);
   3.552 -        __ repne_scanl();
   3.553 -      } else {
   3.554 -        __ repne_scan();
   3.555 -      }
   3.556 -
   3.557 -      // Unspill the temp. registers:
   3.558 -      __ pop(rdi);
   3.559 -      __ pop(rcx);
   3.560 -      __ pop(rax);
   3.561 -
   3.562 -      __ jcc(Assembler::notEqual, L_miss);
   3.563 -    }
   3.564 -
   3.565 -    // Success.  Cache the super we found and proceed in triumph.
   3.566 -    __ movptr(super_cache_addr, super_klass); // note: rax is dead
   3.567 -    __ jmp(L_success);
   3.568 -
   3.569 -    // Fall through on failure!
   3.570 -    __ BIND(L_miss);
   3.571 -  }
   3.572 -
   3.573 -  //
   3.574 -  //  Generate checkcasting array copy stub
   3.575 -  //
   3.576 -  //  Input:
   3.577 -  //    c_rarg0   - source array address
   3.578 -  //    c_rarg1   - destination array address
   3.579 -  //    c_rarg2   - element count, treated as ssize_t, can be zero
   3.580 -  //    c_rarg3   - size_t ckoff (super_check_offset)
   3.581 -  // not Win64
   3.582 -  //    c_rarg4   - oop ckval (super_klass)
   3.583 -  // Win64
   3.584 -  //    rsp+40    - oop ckval (super_klass)
   3.585 -  //
   3.586 -  //  Output:
   3.587 -  //    rax ==  0  -  success
   3.588 -  //    rax == -1^K - failure, where K is partial transfer count
   3.589 -  //
   3.590 -  address generate_checkcast_copy(const char *name) {
   3.591 -
   3.592 -    Label L_load_element, L_store_element, L_do_card_marks, L_done;
   3.593 -
   3.594 -    // Input registers (after setup_arg_regs)
   3.595 -    const Register from        = rdi;   // source array address
   3.596 -    const Register to          = rsi;   // destination array address
   3.597 -    const Register length      = rdx;   // elements count
   3.598 -    const Register ckoff       = rcx;   // super_check_offset
   3.599 -    const Register ckval       = r8;    // super_klass
   3.600 -
   3.601 -    // Registers used as temps (r13, r14 are save-on-entry)
   3.602 -    const Register end_from    = from;  // source array end address
   3.603 -    const Register end_to      = r13;   // destination array end address
   3.604 -    const Register count       = rdx;   // -(count_remaining)
   3.605 -    const Register r14_length  = r14;   // saved copy of length
   3.606 -    // End pointers are inclusive, and if length is not zero they point
   3.607 -    // to the last unit copied:  end_to[0] := end_from[0]
   3.608 -
   3.609 -    const Register rax_oop    = rax;    // actual oop copied
   3.610 -    const Register r11_klass  = r11;    // oop._klass
   3.611 -
   3.612 -    //---------------------------------------------------------------
   3.613 -    // Assembler stub will be used for this call to arraycopy
   3.614 -    // if the two arrays are subtypes of Object[] but the
   3.615 -    // destination array type is not equal to or a supertype
   3.616 -    // of the source type.  Each element must be separately
   3.617 -    // checked.
   3.618 -
   3.619 -    __ align(CodeEntryAlignment);
   3.620 -    StubCodeMark mark(this, "StubRoutines", name);
   3.621 -    address start = __ pc();
   3.622 -
   3.623 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   3.624 -
   3.625 -    checkcast_copy_entry  = __ pc();
   3.626 -    BLOCK_COMMENT("Entry:");
   3.627 -
   3.628 -#ifdef ASSERT
   3.629 -    // caller guarantees that the arrays really are different
   3.630 -    // otherwise, we would have to make conjoint checks
   3.631 -    { Label L;
   3.632 -      array_overlap_test(L, TIMES_OOP);
   3.633 -      __ stop("checkcast_copy within a single array");
   3.634 -      __ bind(L);
   3.635 -    }
   3.636 -#endif //ASSERT
   3.637 -
   3.638 -    // allocate spill slots for r13, r14
   3.639 -    enum {
   3.640 -      saved_r13_offset,
   3.641 -      saved_r14_offset,
   3.642 -      saved_rbp_offset,
   3.643 -      saved_rip_offset,
   3.644 -      saved_rarg0_offset
   3.645 -    };
   3.646 -    __ subptr(rsp, saved_rbp_offset * wordSize);
   3.647 -    __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
   3.648 -    __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
   3.649 -    setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
   3.650 -                       // ckoff => rcx, ckval => r8
   3.651 -                       // r9 and r10 may be used to save non-volatile registers
   3.652 -#ifdef _WIN64
   3.653 -    // last argument (#4) is on stack on Win64
   3.654 -    const int ckval_offset = saved_rarg0_offset + 4;
   3.655 -    __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
   3.656 -#endif
   3.657 -
   3.658 -    // check that int operands are properly extended to size_t
   3.659 -    assert_clean_int(length, rax);
   3.660 -    assert_clean_int(ckoff, rax);
   3.661 -
   3.662 -#ifdef ASSERT
   3.663 -    BLOCK_COMMENT("assert consistent ckoff/ckval");
   3.664 -    // The ckoff and ckval must be mutually consistent,
   3.665 -    // even though caller generates both.
   3.666 -    { Label L;
   3.667 -      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
   3.668 -                        Klass::super_check_offset_offset_in_bytes());
   3.669 -      __ cmpl(ckoff, Address(ckval, sco_offset));
   3.670 -      __ jcc(Assembler::equal, L);
   3.671 -      __ stop("super_check_offset inconsistent");
   3.672 -      __ bind(L);
   3.673 -    }
   3.674 -#endif //ASSERT
   3.675 -
   3.676 -    // Loop-invariant addresses.  They are exclusive end pointers.
   3.677 -    Address end_from_addr(from, length, TIMES_OOP, 0);
   3.678 -    Address   end_to_addr(to,   length, TIMES_OOP, 0);
   3.679 -    // Loop-variant addresses.  They assume post-incremented count < 0.
   3.680 -    Address from_element_addr(end_from, count, TIMES_OOP, 0);
   3.681 -    Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
   3.682 -
   3.683 -    gen_write_ref_array_pre_barrier(to, count);
   3.684 -
   3.685 -    // Copy from low to high addresses, indexed from the end of each array.
   3.686 -    __ lea(end_from, end_from_addr);
   3.687 -    __ lea(end_to,   end_to_addr);
   3.688 -    __ movptr(r14_length, length);        // save a copy of the length
   3.689 -    assert(length == count, "");          // else fix next line:
   3.690 -    __ negptr(count);                     // negate and test the length
   3.691 -    __ jcc(Assembler::notZero, L_load_element);
   3.692 -
   3.693 -    // Empty array:  Nothing to do.
   3.694 -    __ xorptr(rax, rax);                  // return 0 on (trivial) success
   3.695 -    __ jmp(L_done);
   3.696 -
   3.697 -    // ======== begin loop ========
   3.698 -    // (Loop is rotated; its entry is L_load_element.)
   3.699 -    // Loop control:
   3.700 -    //   for (count = -count; count != 0; count++)
   3.701 -    // Base pointers src, dst are biased by 8*(count-1),to last element.
   3.702 -    __ align(16);
   3.703 -
   3.704 -    __ BIND(L_store_element);
   3.705 -    __ store_heap_oop(rax_oop, to_element_addr);  // store the oop
   3.706 -    __ sync();
   3.707 -    __ increment(count);               // increment the count toward zero
   3.708 -    __ jcc(Assembler::zero, L_do_card_marks);
   3.709 -
   3.710 -    // ======== loop entry is here ========
   3.711 -    __ BIND(L_load_element);
   3.712 -    __ load_heap_oop(rax_oop, from_element_addr); // load the oop
   3.713 -    __ testptr(rax_oop, rax_oop);
   3.714 -    __ jcc(Assembler::zero, L_store_element);
   3.715 -
   3.716 -    __ load_klass(r11_klass, rax_oop);// query the object klass
   3.717 -    generate_type_check(r11_klass, ckoff, ckval, L_store_element);
   3.718 -    // ======== end loop ========
   3.719 -
   3.720 -    // It was a real error; we must depend on the caller to finish the job.
   3.721 -    // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
   3.722 -    // Emit GC store barriers for the oops we have copied (r14 + rdx),
   3.723 -    // and report their number to the caller.
   3.724 -    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
   3.725 -    __ lea(end_to, to_element_addr);
   3.726 -    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
   3.727 -    __ movptr(rax, r14_length);           // original oops
   3.728 -    __ addptr(rax, count);                // K = (original - remaining) oops
   3.729 -    __ notptr(rax);                       // report (-1^K) to caller
   3.730 -    __ jmp(L_done);
   3.731 -
   3.732 -    // Come here on success only.
   3.733 -    __ BIND(L_do_card_marks);
   3.734 -    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
   3.735 -    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
   3.736 -    __ xorptr(rax, rax);                  // return 0 on success
   3.737 -
   3.738 -    // Common exit point (success or failure).
   3.739 -    __ BIND(L_done);
   3.740 -    __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
   3.741 -    __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
   3.742 -    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
   3.743 -    restore_arg_regs();
   3.744 -    __ leave(); // required for proper stackwalking of RuntimeStub frame
   3.745 -    __ ret(0);
   3.746 -
   3.747 -    return start;
   3.748 -  }
   3.749 -
   3.750 -  //
   3.751 -  //  Generate 'unsafe' array copy stub
   3.752 -  //  Though just as safe as the other stubs, it takes an unscaled
   3.753 -  //  size_t argument instead of an element count.
   3.754 -  //
   3.755 -  //  Input:
   3.756 -  //    c_rarg0   - source array address
   3.757 -  //    c_rarg1   - destination array address
   3.758 -  //    c_rarg2   - byte count, treated as ssize_t, can be zero
   3.759 -  //
   3.760 -  // Examines the alignment of the operands and dispatches
   3.761 -  // to a long, int, short, or byte copy loop.
   3.762 -  //
   3.763 -  address generate_unsafe_copy(const char *name) {
   3.764 -
   3.765 -    Label L_long_aligned, L_int_aligned, L_short_aligned;
   3.766 -
   3.767 -    // Input registers (before setup_arg_regs)
   3.768 -    const Register from        = c_rarg0;  // source array address
   3.769 -    const Register to          = c_rarg1;  // destination array address
   3.770 -    const Register size        = c_rarg2;  // byte count (size_t)
   3.771 -
   3.772 -    // Register used as a temp
   3.773 -    const Register bits        = rax;      // test copy of low bits
   3.774 -
   3.775 -    __ align(CodeEntryAlignment);
   3.776 -    StubCodeMark mark(this, "StubRoutines", name);
   3.777 -    address start = __ pc();
   3.778 -
   3.779 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   3.780 -
   3.781 -    // bump this on entry, not on exit:
   3.782 -    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
   3.783 -
   3.784 -    __ mov(bits, from);
   3.785 -    __ orptr(bits, to);
   3.786 -    __ orptr(bits, size);
   3.787 -
   3.788 -    __ testb(bits, BytesPerLong-1);
   3.789 -    __ jccb(Assembler::zero, L_long_aligned);
   3.790 -
   3.791 -    __ testb(bits, BytesPerInt-1);
   3.792 -    __ jccb(Assembler::zero, L_int_aligned);
   3.793 -
   3.794 -    __ testb(bits, BytesPerShort-1);
   3.795 -    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
   3.796 -
   3.797 -    __ BIND(L_short_aligned);
   3.798 -    __ shrptr(size, LogBytesPerShort); // size => short_count
   3.799 -    __ jump(RuntimeAddress(short_copy_entry));
   3.800 -
   3.801 -    __ BIND(L_int_aligned);
   3.802 -    __ shrptr(size, LogBytesPerInt); // size => int_count
   3.803 -    __ jump(RuntimeAddress(int_copy_entry));
   3.804 -
   3.805 -    __ BIND(L_long_aligned);
   3.806 -    __ shrptr(size, LogBytesPerLong); // size => qword_count
   3.807 -    __ jump(RuntimeAddress(long_copy_entry));
   3.808 -
   3.809 -    return start;
   3.810 -  }
   3.811 -
   3.812 -  // Perform range checks on the proposed arraycopy.
   3.813 -  // Kills temp, but nothing else.
   3.814 -  // Also, clean the sign bits of src_pos and dst_pos.
   3.815 -  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
   3.816 -                              Register src_pos, // source position (c_rarg1)
   3.817 -                              Register dst,     // destination array oo (c_rarg2)
   3.818 -                              Register dst_pos, // destination position (c_rarg3)
   3.819 -                              Register length,
   3.820 -                              Register temp,
   3.821 -                              Label& L_failed) {
   3.822 -    BLOCK_COMMENT("arraycopy_range_checks:");
   3.823 -
   3.824 -    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
   3.825 -    __ movl(temp, length);
   3.826 -    __ addl(temp, src_pos);             // src_pos + length
   3.827 -    __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
   3.828 -    __ jcc(Assembler::above, L_failed);
   3.829 -
   3.830 -    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
   3.831 -    __ movl(temp, length);
   3.832 -    __ addl(temp, dst_pos);             // dst_pos + length
   3.833 -    __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
   3.834 -    __ jcc(Assembler::above, L_failed);
   3.835 -
   3.836 -    // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
   3.837 -    // Move with sign extension can be used since they are positive.
   3.838 -    __ movslq(src_pos, src_pos);
   3.839 -    __ movslq(dst_pos, dst_pos);
   3.840 -
   3.841 -    BLOCK_COMMENT("arraycopy_range_checks done");
   3.842 -  }
   3.843 -
   3.844 -  //
   3.845 -  //  Generate generic array copy stubs
   3.846 -  //
   3.847 -  //  Input:
   3.848 -  //    c_rarg0    -  src oop
   3.849 -  //    c_rarg1    -  src_pos (32-bits)
   3.850 -  //    c_rarg2    -  dst oop
   3.851 -  //    c_rarg3    -  dst_pos (32-bits)
   3.852 -  // not Win64
   3.853 -  //    c_rarg4    -  element count (32-bits)
   3.854 -  // Win64
   3.855 -  //    rsp+40     -  element count (32-bits)
   3.856 -  //
   3.857 -  //  Output:
   3.858 -  //    rax ==  0  -  success
   3.859 -  //    rax == -1^K - failure, where K is partial transfer count
   3.860 -  //
   3.861 -  address generate_generic_copy(const char *name) {
   3.862 -
   3.863 -    Label L_failed, L_failed_0, L_objArray;
   3.864 -    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
   3.865 -
   3.866 -    // Input registers
   3.867 -    const Register src        = c_rarg0;  // source array oop
   3.868 -    const Register src_pos    = c_rarg1;  // source position
   3.869 -    const Register dst        = c_rarg2;  // destination array oop
   3.870 -    const Register dst_pos    = c_rarg3;  // destination position
   3.871 -    // elements count is on stack on Win64
   3.872 -#ifdef _WIN64
   3.873 -#define C_RARG4 Address(rsp, 6 * wordSize)
   3.874 -#else
   3.875 -#define C_RARG4 c_rarg4
   3.876 -#endif
   3.877 -
   3.878 -    { int modulus = CodeEntryAlignment;
   3.879 -      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
   3.880 -      int advance = target - (__ offset() % modulus);
   3.881 -      if (advance < 0)  advance += modulus;
   3.882 -      if (advance > 0)  __ nop(advance);
   3.883 -    }
   3.884 -    StubCodeMark mark(this, "StubRoutines", name);
   3.885 -
   3.886 -    // Short-hop target to L_failed.  Makes for denser prologue code.
   3.887 -    __ BIND(L_failed_0);
   3.888 -    __ jmp(L_failed);
   3.889 -    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
   3.890 -
   3.891 -    __ align(CodeEntryAlignment);
   3.892 -    address start = __ pc();
   3.893 -
   3.894 -    __ enter(); // required for proper stackwalking of RuntimeStub frame
   3.895 -
   3.896 -    // bump this on entry, not on exit:
   3.897 -    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
   3.898 -
   3.899 -    //-----------------------------------------------------------------------
   3.900 -    // Assembler stub will be used for this call to arraycopy
   3.901 -    // if the following conditions are met:
   3.902 -    //
   3.903 -    // (1) src and dst must not be null.
   3.904 -    // (2) src_pos must not be negative.
   3.905 -    // (3) dst_pos must not be negative.
   3.906 -    // (4) length  must not be negative.
   3.907 -    // (5) src klass and dst klass should be the same and not NULL.
   3.908 -    // (6) src and dst should be arrays.
   3.909 -    // (7) src_pos + length must not exceed length of src.
   3.910 -    // (8) dst_pos + length must not exceed length of dst.
   3.911 -    //
   3.912 -
   3.913 -    //  if (src == NULL) return -1;
   3.914 -    __ testptr(src, src);         // src oop
   3.915 -    size_t j1off = __ offset();
   3.916 -    __ jccb(Assembler::zero, L_failed_0);
   3.917 -
   3.918 -    //  if (src_pos < 0) return -1;
   3.919 -    __ testl(src_pos, src_pos); // src_pos (32-bits)
   3.920 -    __ jccb(Assembler::negative, L_failed_0);
   3.921 -
   3.922 -    //  if (dst == NULL) return -1;
   3.923 -    __ testptr(dst, dst);         // dst oop
   3.924 -    __ jccb(Assembler::zero, L_failed_0);
   3.925 -
   3.926 -    //  if (dst_pos < 0) return -1;
   3.927 -    __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
   3.928 -    size_t j4off = __ offset();
   3.929 -    __ jccb(Assembler::negative, L_failed_0);
   3.930 -
   3.931 -    // The first four tests are very dense code,
   3.932 -    // but not quite dense enough to put four
   3.933 -    // jumps in a 16-byte instruction fetch buffer.
   3.934 -    // That's good, because some branch predicters
   3.935 -    // do not like jumps so close together.
   3.936 -    // Make sure of this.
   3.937 -    guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
   3.938 -
   3.939 -    // registers used as temp
   3.940 -    const Register r11_length    = r11; // elements count to copy
   3.941 -    const Register r10_src_klass = r10; // array klass
   3.942 -    const Register r9_dst_klass  = r9;  // dest array klass
   3.943 -
   3.944 -    //  if (length < 0) return -1;
   3.945 -    __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
   3.946 -    __ testl(r11_length, r11_length);
   3.947 -    __ jccb(Assembler::negative, L_failed_0);
   3.948 -
   3.949 -    __ load_klass(r10_src_klass, src);
   3.950 -#ifdef ASSERT
   3.951 -    //  assert(src->klass() != NULL);
   3.952 -    BLOCK_COMMENT("assert klasses not null");
   3.953 -    { Label L1, L2;
   3.954 -      __ testptr(r10_src_klass, r10_src_klass);
   3.955 -      __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
   3.956 -      __ bind(L1);
   3.957 -      __ stop("broken null klass");
   3.958 -      __ bind(L2);
   3.959 -      __ load_klass(r9_dst_klass, dst);
   3.960 -      __ cmpq(r9_dst_klass, 0);
   3.961 -      __ jcc(Assembler::equal, L1);     // this would be broken also
   3.962 -      BLOCK_COMMENT("assert done");
   3.963 -    }
   3.964 -#endif
   3.965 -
   3.966 -    // Load layout helper (32-bits)
   3.967 -    //
   3.968 -    //  |array_tag|     | header_size | element_type |     |log2_element_size|
   3.969 -    // 32        30    24            16              8     2                 0
   3.970 -    //
   3.971 -    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
   3.972 -    //
   3.973 -
   3.974 -    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
   3.975 -                    Klass::layout_helper_offset_in_bytes();
   3.976 -
   3.977 -    const Register rax_lh = rax;  // layout helper
   3.978 -
   3.979 -    __ movl(rax_lh, Address(r10_src_klass, lh_offset));
   3.980 -
   3.981 -    // Handle objArrays completely differently...
   3.982 -    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
   3.983 -    __ cmpl(rax_lh, objArray_lh);
   3.984 -    __ jcc(Assembler::equal, L_objArray);
   3.985 -
   3.986 -    //  if (src->klass() != dst->klass()) return -1;
   3.987 -    __ load_klass(r9_dst_klass, dst);
   3.988 -    __ cmpq(r10_src_klass, r9_dst_klass);
   3.989 -    __ jcc(Assembler::notEqual, L_failed);
   3.990 -
   3.991 -    //  if (!src->is_Array()) return -1;
   3.992 -    __ cmpl(rax_lh, Klass::_lh_neutral_value);
   3.993 -    __ jcc(Assembler::greaterEqual, L_failed);
   3.994 -
   3.995 -    // At this point, it is known to be a typeArray (array_tag 0x3).
   3.996 -#ifdef ASSERT
   3.997 -    { Label L;
   3.998 -      __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
   3.999 -      __ jcc(Assembler::greaterEqual, L);
  3.1000 -      __ stop("must be a primitive array");
  3.1001 -      __ bind(L);
  3.1002 -    }
  3.1003 -#endif
  3.1004 -
  3.1005 -    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  3.1006 -                           r10, L_failed);
  3.1007 -
  3.1008 -    // typeArrayKlass
  3.1009 -    //
  3.1010 -    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
  3.1011 -    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
  3.1012 -    //
  3.1013 -
  3.1014 -    const Register r10_offset = r10;    // array offset
  3.1015 -    const Register rax_elsize = rax_lh; // element size
  3.1016 -
  3.1017 -    __ movl(r10_offset, rax_lh);
  3.1018 -    __ shrl(r10_offset, Klass::_lh_header_size_shift);
  3.1019 -    __ andptr(r10_offset, Klass::_lh_header_size_mask);   // array_offset
  3.1020 -    __ addptr(src, r10_offset);           // src array offset
  3.1021 -    __ addptr(dst, r10_offset);           // dst array offset
  3.1022 -    BLOCK_COMMENT("choose copy loop based on element size");
  3.1023 -    __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
  3.1024 -
  3.1025 -    // next registers should be set before the jump to corresponding stub
  3.1026 -    const Register from     = c_rarg0;  // source array address
  3.1027 -    const Register to       = c_rarg1;  // destination array address
  3.1028 -    const Register count    = c_rarg2;  // elements count
  3.1029 -
  3.1030 -    // 'from', 'to', 'count' registers should be set in such order
  3.1031 -    // since they are the same as 'src', 'src_pos', 'dst'.
  3.1032 -
  3.1033 -  __ BIND(L_copy_bytes);
  3.1034 -    __ cmpl(rax_elsize, 0);
  3.1035 -    __ jccb(Assembler::notEqual, L_copy_shorts);
  3.1036 -    __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
  3.1037 -    __ lea(to,   Address(dst, dst_pos, Address::times_1, 0));// dst_addr
  3.1038 -    __ movl2ptr(count, r11_length); // length
  3.1039 -    __ jump(RuntimeAddress(byte_copy_entry));
  3.1040 -
  3.1041 -  __ BIND(L_copy_shorts);
  3.1042 -    __ cmpl(rax_elsize, LogBytesPerShort);
  3.1043 -    __ jccb(Assembler::notEqual, L_copy_ints);
  3.1044 -    __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
  3.1045 -    __ lea(to,   Address(dst, dst_pos, Address::times_2, 0));// dst_addr
  3.1046 -    __ movl2ptr(count, r11_length); // length
  3.1047 -    __ jump(RuntimeAddress(short_copy_entry));
  3.1048 -
  3.1049 -  __ BIND(L_copy_ints);
  3.1050 -    __ cmpl(rax_elsize, LogBytesPerInt);
  3.1051 -    __ jccb(Assembler::notEqual, L_copy_longs);
  3.1052 -    __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
  3.1053 -    __ lea(to,   Address(dst, dst_pos, Address::times_4, 0));// dst_addr
  3.1054 -    __ movl2ptr(count, r11_length); // length
  3.1055 -    __ jump(RuntimeAddress(int_copy_entry));
  3.1056 -
  3.1057 -  __ BIND(L_copy_longs);
  3.1058 -#ifdef ASSERT
  3.1059 -    { Label L;
  3.1060 -      __ cmpl(rax_elsize, LogBytesPerLong);
  3.1061 -      __ jcc(Assembler::equal, L);
  3.1062 -      __ stop("must be long copy, but elsize is wrong");
  3.1063 -      __ bind(L);
  3.1064 -    }
  3.1065 -#endif
  3.1066 -    __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
  3.1067 -    __ lea(to,   Address(dst, dst_pos, Address::times_8, 0));// dst_addr
  3.1068 -    __ movl2ptr(count, r11_length); // length
  3.1069 -    __ jump(RuntimeAddress(long_copy_entry));
  3.1070 -
  3.1071 -    // objArrayKlass
  3.1072 -  __ BIND(L_objArray);
  3.1073 -    // live at this point:  r10_src_klass, src[_pos], dst[_pos]
  3.1074 -
  3.1075 -    Label L_plain_copy, L_checkcast_copy;
  3.1076 -    //  test array classes for subtyping
  3.1077 -    __ load_klass(r9_dst_klass, dst);
  3.1078 -    __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
  3.1079 -    __ jcc(Assembler::notEqual, L_checkcast_copy);
  3.1080 -
  3.1081 -    // Identically typed arrays can be copied without element-wise checks.
  3.1082 -    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  3.1083 -                           r10, L_failed);
  3.1084 -
  3.1085 -    __ lea(from, Address(src, src_pos, TIMES_OOP,
  3.1086 -                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
  3.1087 -    __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
  3.1088 -                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
  3.1089 -    __ movl2ptr(count, r11_length); // length
  3.1090 -  __ BIND(L_plain_copy);
  3.1091 -    __ jump(RuntimeAddress(oop_copy_entry));
  3.1092 -
  3.1093 -  __ BIND(L_checkcast_copy);
  3.1094 -    // live at this point:  r10_src_klass, !r11_length
  3.1095 -    {
  3.1096 -      // assert(r11_length == C_RARG4); // will reload from here
  3.1097 -      Register r11_dst_klass = r11;
  3.1098 -      __ load_klass(r11_dst_klass, dst);
  3.1099 -
  3.1100 -      // Before looking at dst.length, make sure dst is also an objArray.
  3.1101 -      __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
  3.1102 -      __ jcc(Assembler::notEqual, L_failed);
  3.1103 -
  3.1104 -      // It is safe to examine both src.length and dst.length.
  3.1105 -#ifndef _WIN64
  3.1106 -      arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
  3.1107 -                             rax, L_failed);
  3.1108 -#else
  3.1109 -      __ movl(r11_length, C_RARG4);     // reload
  3.1110 -      arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
  3.1111 -                             rax, L_failed);
  3.1112 -      __ load_klass(r11_dst_klass, dst); // reload
  3.1113 -#endif
  3.1114 -
  3.1115 -      // Marshal the base address arguments now, freeing registers.
  3.1116 -      __ lea(from, Address(src, src_pos, TIMES_OOP,
  3.1117 -                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  3.1118 -      __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
  3.1119 -                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
  3.1120 -      __ movl(count, C_RARG4);          // length (reloaded)
  3.1121 -      Register sco_temp = c_rarg3;      // this register is free now
  3.1122 -      assert_different_registers(from, to, count, sco_temp,
  3.1123 -                                 r11_dst_klass, r10_src_klass);
  3.1124 -      assert_clean_int(count, sco_temp);
  3.1125 -
  3.1126 -      // Generate the type check.
  3.1127 -      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  3.1128 -                        Klass::super_check_offset_offset_in_bytes());
  3.1129 -      __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
  3.1130 -      assert_clean_int(sco_temp, rax);
  3.1131 -      generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
  3.1132 -
  3.1133 -      // Fetch destination element klass from the objArrayKlass header.
  3.1134 -      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
  3.1135 -                       objArrayKlass::element_klass_offset_in_bytes());
  3.1136 -      __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
  3.1137 -      __ movl(sco_temp,      Address(r11_dst_klass, sco_offset));
  3.1138 -      assert_clean_int(sco_temp, rax);
  3.1139 -
  3.1140 -      // the checkcast_copy loop needs two extra arguments:
  3.1141 -      assert(c_rarg3 == sco_temp, "#3 already in place");
  3.1142 -      __ movptr(C_RARG4, r11_dst_klass);  // dst.klass.element_klass
  3.1143 -      __ jump(RuntimeAddress(checkcast_copy_entry));
  3.1144 -    }
  3.1145 -
  3.1146 -  __ BIND(L_failed);
  3.1147 -    __ xorptr(rax, rax);
  3.1148 -    __ notptr(rax); // return -1
  3.1149 -    __ leave();   // required for proper stackwalking of RuntimeStub frame
  3.1150 -    __ ret(0);
  3.1151 -
  3.1152 -    return start;
  3.1153 -  }
  3.1154 -
  3.1155 -#undef length_arg
  3.1156 -#endif
  3.1157  
  3.1158  //FIXME
  3.1159    address generate_disjoint_long_copy(bool aligned, const char *name) {
     4.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp	Tue Sep 20 10:40:42 2016 +0800
     4.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp	Sun Oct 09 14:48:55 2016 +0800
     4.3 @@ -633,6 +633,11 @@
     4.4    // check array
     4.5    __ null_check(array, arrayOopDesc::length_offset_in_bytes());
     4.6  
     4.7 +#ifdef _LP64
     4.8 +  // sign extend since tos (index) might contain garbage in upper bits
     4.9 +  __ sll(index, index, 0);
    4.10 +#endif // _LP64
    4.11 +
    4.12    // check index
    4.13    Label ok;
    4.14    __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
     5.1 --- a/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp	Tue Sep 20 10:40:42 2016 +0800
     5.2 +++ b/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp	Sun Oct 09 14:48:55 2016 +0800
     5.3 @@ -167,8 +167,12 @@
     5.4  
     5.5  static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
     5.6    assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
     5.7 -  // pd_conjoint_words is word-atomic in this implementation.
     5.8 -  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
     5.9 +  if(UseCompressedOops) {
    5.10 +     copy_conjoint_atomic<jint>((jint*)from, (jint*)to, count);
    5.11 +  } else {
    5.12 +     // pd_conjoint_words is word-atomic in this implementation.
    5.13 +     pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
    5.14 +  }
    5.15  }
    5.16  
    5.17  static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
     6.1 --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp	Tue Sep 20 10:40:42 2016 +0800
     6.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp	Sun Oct 09 14:48:55 2016 +0800
     6.3 @@ -85,7 +85,13 @@
     6.4    // Card marking
     6.5    void inline_write_ref_field_gc(void* field, oop new_val) {
     6.6      jbyte* byte = byte_for(field);
     6.7 +#ifdef MIPS64
     6.8 +      OrderAccess::fence();
     6.9 +#endif
    6.10      *byte = youngergen_card;
    6.11 +#ifdef MIPS64
    6.12 +      OrderAccess::fence();
    6.13 +#endif
    6.14    }
    6.15  
    6.16    // Adaptive size policy support
     7.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Tue Sep 20 10:40:42 2016 +0800
     7.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sun Oct 09 14:48:55 2016 +0800
     7.3 @@ -3239,6 +3239,9 @@
     7.4        OrderAccess::fence();
     7.5  #endif
     7.6        sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
     7.7 +#ifdef MIPS64
     7.8 +      OrderAccess::fence();
     7.9 +#endif
    7.10      }
    7.11  
    7.12      const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
     8.1 --- a/src/share/vm/memory/barrierSet.hpp	Tue Sep 20 10:40:42 2016 +0800
     8.2 +++ b/src/share/vm/memory/barrierSet.hpp	Sun Oct 09 14:48:55 2016 +0800
     8.3 @@ -27,6 +27,7 @@
     8.4  
     8.5  #include "memory/memRegion.hpp"
     8.6  #include "oops/oopsHierarchy.hpp"
     8.7 +#include "runtime/orderAccess.hpp"
     8.8  
     8.9  // This class provides the interface between a barrier implementation and
    8.10  // the rest of the system.
    8.11 @@ -95,8 +96,16 @@
    8.12    // Keep this private so as to catch violations at build time.
    8.13    virtual void write_ref_field_pre_work(     void* field, oop new_val) { guarantee(false, "Not needed"); };
    8.14  protected:
    8.15 -  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {};
    8.16 -  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
    8.17 +  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {
    8.18 +#ifdef MIPS64
    8.19 +      OrderAccess::fence();
    8.20 +#endif
    8.21 +  };
    8.22 +  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {
    8.23 +#ifdef MIPS64
    8.24 +      OrderAccess::fence();
    8.25 +#endif
    8.26 +  };
    8.27  public:
    8.28  
    8.29    // ...then the post-write version.
    8.30 @@ -132,9 +141,17 @@
    8.31  
    8.32    // Below length is the # array elements being written
    8.33    virtual void write_ref_array_pre(oop* dst, int length,
    8.34 -                                   bool dest_uninitialized = false) {}
    8.35 +                                   bool dest_uninitialized = false) {
    8.36 +#ifdef MIPS64
    8.37 +      OrderAccess::fence();
    8.38 +#endif
    8.39 +  }
    8.40    virtual void write_ref_array_pre(narrowOop* dst, int length,
    8.41 -                                   bool dest_uninitialized = false) {}
    8.42 +                                   bool dest_uninitialized = false) {
    8.43 +#ifdef MIPS64
    8.44 +      OrderAccess::fence();
    8.45 +#endif
    8.46 +}
    8.47    // Below count is the # array elements being written, starting
    8.48    // at the address "start", which may not necessarily be HeapWord-aligned
    8.49    inline void write_ref_array(HeapWord* start, size_t count);
     9.1 --- a/src/share/vm/memory/cardTableModRefBS.hpp	Tue Sep 20 10:40:42 2016 +0800
     9.2 +++ b/src/share/vm/memory/cardTableModRefBS.hpp	Sun Oct 09 14:48:55 2016 +0800
     9.3 @@ -309,6 +309,9 @@
     9.4  
     9.5    inline void inline_write_ref_array(MemRegion mr) {
     9.6      dirty_MemRegion(mr);
     9.7 +#ifdef MIPS64
     9.8 +    OrderAccess::fence();
     9.9 +#endif
    9.10    }
    9.11  protected:
    9.12    void write_ref_array_work(MemRegion mr) {
    9.13 @@ -322,7 +325,11 @@
    9.14  
    9.15    // *** Card-table-barrier-specific things.
    9.16  
    9.17 -  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}
    9.18 +  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
    9.19 +#ifdef MIPS64
    9.20 +    OrderAccess::fence();
    9.21 +#endif
    9.22 +  }
    9.23  
    9.24    template <class T> inline void inline_write_ref_field(T* field, oop newVal, bool release) {
    9.25      jbyte* byte = byte_for((void*)field);
    9.26 @@ -332,6 +339,9 @@
    9.27      } else {
    9.28        *byte = dirty_card;
    9.29      }
    9.30 +#ifdef MIPS64
    9.31 +    OrderAccess::fence();
    9.32 +#endif
    9.33    }
    9.34  
    9.35    // These are used by G1, when it uses the card table as a temporary data
    10.1 --- a/src/share/vm/memory/cardTableRS.cpp	Tue Sep 20 10:40:42 2016 +0800
    10.2 +++ b/src/share/vm/memory/cardTableRS.cpp	Sun Oct 09 14:48:55 2016 +0800
    10.3 @@ -251,6 +251,9 @@
    10.4  // cur_youngergen_and_prev_nonclean_card ==> no change.
    10.5  void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) {
    10.6    jbyte* entry = ct_bs()->byte_for(field);
    10.7 +#ifdef MIPS64
    10.8 +  OrderAccess::fence();
    10.9 +#endif
   10.10    do {
   10.11      jbyte entry_val = *entry;
   10.12      // We put this first because it's probably the most common case.
   10.13 @@ -265,7 +268,12 @@
   10.14        jbyte new_val = cur_youngergen_and_prev_nonclean_card;
   10.15        jbyte res = Atomic::cmpxchg(new_val, entry, entry_val);
   10.16        // Did the CAS succeed?
   10.17 -      if (res == entry_val) return;
   10.18 +      if (res == entry_val) {
   10.19 +      #ifdef MIPS64
   10.20 +         OrderAccess::fence();
   10.21 +      #endif
   10.22 +         return;
   10.23 +      }
   10.24        // Otherwise, retry, to see the new value.
   10.25        continue;
   10.26      } else {
    11.1 --- a/src/share/vm/memory/cardTableRS.hpp	Tue Sep 20 10:40:42 2016 +0800
    11.2 +++ b/src/share/vm/memory/cardTableRS.hpp	Sun Oct 09 14:48:55 2016 +0800
    11.3 @@ -121,7 +121,14 @@
    11.4  
    11.5    void inline_write_ref_field_gc(void* field, oop new_val) {
    11.6      jbyte* byte = _ct_bs->byte_for(field);
    11.7 -    *byte = youngergen_card;
    11.8 +#ifdef MIPS64
    11.9 +    OrderAccess::fence();
   11.10 +#endif
   11.11 +   *byte = youngergen_card;
   11.12 +#ifdef MIPS64
   11.13 +   OrderAccess::fence();
   11.14 +#endif
   11.15 +
   11.16    }
   11.17    void write_ref_field_gc_work(void* field, oop new_val) {
   11.18      inline_write_ref_field_gc(field, new_val);

mercurial