Fixed changset 108.

Tue, 10 May 2016 15:08:51 -0400

author
aoqi
date
Tue, 10 May 2016 15:08:51 -0400
changeset 8
cf5765c81f87
parent 7
e26ad49b7194
child 9
81c75db49881

Fixed changset 108.

src/cpu/mips/vm/stubGenerator_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Sun May 08 22:17:54 2016 -0400
     1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Tue May 10 15:08:51 2016 -0400
     1.3 @@ -550,6 +550,14 @@
     1.4      __ bgtz(AT, no_overlap_target); 
     1.5      __ delayed()->nop(); 
     1.6  
     1.7 +    // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target 
     1.8 +    Label L;
     1.9 +    __ bgez(A0, L);
    1.10 +    __ delayed()->nop(); 
    1.11 +    __ bgtz(A1, no_overlap_target);
    1.12 +    __ delayed()->nop(); 
    1.13 +    __ bind(L);
    1.14 +
    1.15    }
    1.16  
    1.17    //
    1.18 @@ -638,6 +646,7 @@
    1.19  	  __ blez(AT, l_4);  
    1.20  	  __ delayed()->nop();	
    1.21  	  if (!aligned) {
    1.22 +          //TODO: copy 8 bytes at one time
    1.23  	    // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */
    1.24  	    __ andi(AT, T3, 3); 
    1.25  	    __ andi(T9, T0, 3); 
    1.26 @@ -706,9 +715,9 @@
    1.27    //   name    - stub name string
    1.28    //
    1.29    // Inputs:
    1.30 -  //   c_rarg0   - source array address
    1.31 -  //   c_rarg1   - destination array address
    1.32 -  //   c_rarg2   - element count, treated as ssize_t, can be zero
    1.33 +  //   A0   - source array address
    1.34 +  //   A1   - destination array address
    1.35 +  //   A2   - element count, treated as ssize_t, can be zero
    1.36    //
    1.37    // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
    1.38    // we let the hardware handle it.  The one to eight bytes within words,
    1.39 @@ -716,95 +725,128 @@
    1.40    // and stored atomically.
    1.41    //
    1.42    address generate_conjoint_byte_copy(bool aligned, const char *name) {
    1.43 -		Label l_1, l_2, l_3, l_4, l_5;
    1.44 -		Label l_unaligned, l_aligned;
    1.45 -		StubCodeMark mark(this, "StubRoutines", name);
    1.46 -		__ align(CodeEntryAlignment);
    1.47 -		address start = __ pc();
    1.48 -		address nooverlap_target = aligned ?
    1.49 -		StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
    1.50 -		StubRoutines::jbyte_disjoint_arraycopy();
    1.51 +    __ align(CodeEntryAlignment);
    1.52 +    StubCodeMark mark(this, "StubRoutines", name);
    1.53 +    address start = __ pc();
    1.54  
    1.55 -		array_overlap_test(nooverlap_target, 0);
    1.56 +    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
    1.57 +    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
    1.58  
    1.59 -		__ push(T3);	
    1.60 -		__ push(T0);	
    1.61 -		__ push(T1);	
    1.62 -		__ push(T8);	
    1.63 +    address nooverlap_target = aligned ?
    1.64 +	    StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
    1.65 +	    StubRoutines::jbyte_disjoint_arraycopy();
    1.66  
    1.67 -		// copy from high to low
    1.68 -		__ move(T3, A0); 
    1.69 -		__ move(T0, A1);
    1.70 -		__ move(T1, A2);  
    1.71 -		__ dadd(T3, T3, T1);  
    1.72 -		__ dadd(T0, T0, T1);  
    1.73 +    array_overlap_test(nooverlap_target, 0);
    1.74  
    1.75 -		// 2016/5/8 Jin: copy starting unalinged bytes
    1.76 -		__ bind(l_unaligned);
    1.77 -		__ beq(T1, R0, l_5); 
    1.78 -		__ delayed()->nop(); 
    1.79 +    const Register from      = A0;   // source array address
    1.80 +    const Register to        = A1;   // destination array address
    1.81 +    const Register count     = A2;   // elements count
    1.82 +    const Register end_from  = T3;   // source array end address
    1.83 +    const Register end_to    = T0;   // destination array end address
    1.84 +    const Register end_count = T1;   // destination array end address
    1.85  
    1.86 -		__ andi(AT, T3, 3);    
    1.87 -		__ beq(AT, R0, l_aligned); 
    1.88 -		__ delayed()->nop(); 
    1.89 -		__ lb(AT, T3, -1);   
    1.90 -		__ sb(AT, T0, -1); 
    1.91 -		__ daddi(AT, T1, -1); 
    1.92 -		__ daddi(AT, T3, -1); 
    1.93 -		__ daddi(AT, T0, -1); 
    1.94 -		__ b(l_unaligned); 
    1.95 -		__ delayed()->nop(); 
    1.96 -		
    1.97 -		// now T0, T3 point to 4-byte aligned high-ends
    1.98 -		//     T1 contains byte count that is not copied.
    1.99 -		__ bind(l_aligned);
   1.100 +    __ push(end_from);	
   1.101 +    __ push(end_to);	
   1.102 +    __ push(end_count);	
   1.103 +    __ push(T8);	
   1.104  
   1.105 -		__ move(T8, T1); 
   1.106 -		__ daddi(AT, T1, -3); 
   1.107 -		__ blez(AT, l_3); 
   1.108 -		__ delayed()->nop();	
   1.109 +    // copy from high to low
   1.110 +    __ move(end_count, count);  
   1.111 +    __ dadd(end_from, from, end_count);  
   1.112 +    __ dadd(end_to, to, end_count);  
   1.113  
   1.114 -		__ andi(T8, T8, 3); 
   1.115 -		__ lea(T3, Address(T3, -4));
   1.116 -		__ lea(T0, Address(T0, -4));
   1.117 +    // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
   1.118 +    __ andi(AT, end_from, 3); 
   1.119 +    __ andi(T8, end_to, 3); 
   1.120 +    __ bne(AT, T8, l_copy_byte); 
   1.121 +    __ delayed()->nop();	
   1.122  
   1.123 -		__ dsrl(T1, T1, 2); 
   1.124 -		__ align(16);
   1.125 -		__ bind(l_1);
   1.126 -		__ lw(AT, T3, 0);   
   1.127 -		__ sw(AT, T0, 0); 
   1.128 -		__ addi(T3, T3, -4);    
   1.129 -		__ addi(T0, T0, -4);    
   1.130 -		__ addi(T1, T1, -1);  
   1.131 -		__ bne(T1, R0, l_1); 
   1.132 -		__ delayed()->nop(); 
   1.133 -		__ b(l_3);  
   1.134 -		__ delayed()->nop(); 
   1.135 -		// copy dwords aligned or not with repeat move
   1.136 -		__ bind(l_2);
   1.137 -		__ bind(l_3);
   1.138 -		// copy suffix (0-3 bytes)
   1.139 -		__ andi(T8, T8, 3); 
   1.140 -		__ beq(T8, R0, l_5); 
   1.141 -		__ delayed()->nop(); 
   1.142 -		__ addi(T3, T3, 3); 
   1.143 -		__ addi(T0, T0, 3); 
   1.144 -		__ bind(l_4);
   1.145 -		__ lb(AT, T3, 0);  
   1.146 -		__ sb(AT, T0, 0); 
   1.147 -		__ addi(T3, T3, -1);  
   1.148 -		__ addi(T0, T0, -1);  
   1.149 -		__ addi(T8, T8, -1); 
   1.150 -		__ bne(T8, R0, l_4); 
   1.151 -		__ delayed()->nop(); 
   1.152 -		__ bind(l_5);
   1.153 -		__ pop(T8);	
   1.154 -		__ pop(T1);	
   1.155 -		__ pop(T0);	
   1.156 -		__ pop(T3);	
   1.157 -		__ jr(RA); 
   1.158 -		__ delayed()->nop(); 
   1.159 -		return start;
   1.160 +    // First deal with the unaligned data at the top.
   1.161 +    __ bind(l_unaligned);
   1.162 +    __ beq(end_count, R0, l_exit); 
   1.163 +    __ delayed()->nop(); 
   1.164 +
   1.165 +    __ andi(AT, end_from, 3);    
   1.166 +    __ bne(AT, R0, l_from_unaligned); 
   1.167 +    __ delayed()->nop(); 
   1.168 +
   1.169 +    __ andi(AT, end_to, 3);    
   1.170 +    __ beq(AT, R0, l_4_bytes_aligned); 
   1.171 +    __ delayed()->nop(); 
   1.172 +
   1.173 +    __ bind(l_from_unaligned);
   1.174 +    __ lb(AT, end_from, -1);   
   1.175 +    __ sb(AT, end_to, -1); 
   1.176 +    __ daddi(end_from, end_from, -1); 
   1.177 +    __ daddi(end_to, end_to, -1); 
   1.178 +    __ daddi(end_count, end_count, -1); 
   1.179 +    __ b(l_unaligned); 
   1.180 +    __ delayed()->nop(); 
   1.181 +
   1.182 +    // now end_to, end_from point to 4-byte aligned high-ends
   1.183 +    //     end_count contains byte count that is not copied.
   1.184 +    // copy 4 bytes at a time
   1.185 +    __ bind(l_4_bytes_aligned);
   1.186 +
   1.187 +    __ move(T8, end_count); 
   1.188 +    __ daddi(AT, end_count, -3); 
   1.189 +    __ blez(AT, l_copy_suffix); 
   1.190 +    __ delayed()->nop();	
   1.191 +
   1.192 +    //__ andi(T8, T8, 3); 
   1.193 +    __ lea(end_from, Address(end_from, -4));
   1.194 +    __ lea(end_to, Address(end_to, -4));
   1.195 +
   1.196 +    __ dsrl(end_count, end_count, 2); 
   1.197 +    __ align(16);
   1.198 +    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
   1.199 +    __ lw(AT, end_from, 0);   
   1.200 +    __ sw(AT, end_to, 0); 
   1.201 +    __ addi(end_from, end_from, -4);    
   1.202 +    __ addi(end_to, end_to, -4);    
   1.203 +    __ addi(end_count, end_count, -1);  
   1.204 +    __ bne(end_count, R0, l_copy_4_bytes_loop); 
   1.205 +    __ delayed()->nop(); 
   1.206 +
   1.207 +    __ b(l_copy_suffix);  
   1.208 +    __ delayed()->nop(); 
   1.209 +    // copy dwords aligned or not with repeat move
   1.210 +    // l_copy_suffix
   1.211 +    // copy suffix (0-3 bytes)
   1.212 +    __ bind(l_copy_suffix); 
   1.213 +    __ andi(T8, T8, 3); 
   1.214 +    __ beq(T8, R0, l_exit); 
   1.215 +    __ delayed()->nop(); 
   1.216 +    __ addi(end_from, end_from, 3); 
   1.217 +    __ addi(end_to, end_to, 3); 
   1.218 +    __ bind(l_copy_suffix_loop);
   1.219 +    __ lb(AT, end_from, 0);  
   1.220 +    __ sb(AT, end_to, 0); 
   1.221 +    __ addi(end_from, end_from, -1);  
   1.222 +    __ addi(end_to, end_to, -1);  
   1.223 +    __ addi(T8, T8, -1); 
   1.224 +    __ bne(T8, R0, l_copy_suffix_loop); 
   1.225 +    __ delayed()->nop(); 
   1.226 +
   1.227 +    __ bind(l_copy_byte);
   1.228 +    __ beq(end_count, R0, l_exit); 
   1.229 +    __ delayed()->nop(); 
   1.230 +    __ lb(AT, end_from, -1);   
   1.231 +    __ sb(AT, end_to, -1); 
   1.232 +    __ daddi(end_from, end_from, -1); 
   1.233 +    __ daddi(end_to, end_to, -1); 
   1.234 +    __ daddi(end_count, end_count, -1); 
   1.235 +    __ b(l_copy_byte); 
   1.236 +    __ delayed()->nop(); 
   1.237 +
   1.238 +    __ bind(l_exit);
   1.239 +    __ pop(T8);	
   1.240 +    __ pop(end_count);	
   1.241 +    __ pop(end_to);	
   1.242 +    __ pop(end_from);	
   1.243 +    __ jr(RA); 
   1.244 +    __ delayed()->nop(); 
   1.245 +    return start;
   1.246    }
   1.247  
   1.248    // Arguments:

mercurial