[C2] disjoint_byte_copy: fix unaligned LW/SW

Sun, 08 May 2016 22:17:54 -0400

author
Jin
date
Sun, 08 May 2016 22:17:54 -0400
changeset 7
e26ad49b7194
parent 6
fbd9470e188d
child 8
cf5765c81f87

[C2] disjoint_byte_copy: fix unaligned LW/SW

/mnt/j2sdk-image/bin/java \
-XX:+PrintCompilation \
-XX:CompileCommand="compileonly,java/util/jar/Manifest\$FastInputStream.*" \
-jar SPECjvm2008.jar -ikv -coe -ict -bt 1 \
compress

It triggers 35000 unaligned instructions.

280 CALL_LEAF_NOFP,runtime jbyte_arraycopy

a0: (-122610748, .) fc127a6b pc: ffe9809004 <-- src是个奇地址
a1: (-122610748, .) fc128100 pc: ffe980910c <-- dest是个偶地址
a2: (-122610748, .) 2b pc: ffe9809210
t3: (-122610748, .) fc127a6c pc: ffe98093a4 <-- 试图把src对齐4字节,
t0: (-122610748, .) fc128101 pc: ffe98094a8 <-- 则dest就不对齐了,使用sw就出了unalign
t3: (-122610747, *) fc127a70 pc: ffe98093a4
t0: (-122610747, .) fc128105 pc: ffe98094a8 <-- 后面所有对dest的访问都会unalign
t3: (-122610746, *) fc127a74 pc: ffe98093a4
t0: (-122610746, .) fc128109 pc: ffe98094a8

Effect:
* 0 unaligned instructions.

src/cpu/mips/vm/stubGenerator_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Sun May 08 17:53:51 2016 -0400
     1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Sun May 08 22:17:54 2016 -0400
     1.3 @@ -638,6 +638,12 @@
     1.4  	  __ blez(AT, l_4);  
     1.5  	  __ delayed()->nop();	
     1.6  	  if (!aligned) {
     1.7 +	    // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */
     1.8 +	    __ andi(AT, T3, 3); 
     1.9 +	    __ andi(T9, T0, 3); 
    1.10 +	    __ bne(AT, T9, l_5); 
    1.11 +	    __ delayed()->nop();	
    1.12 +	  
    1.13  	    // align source address at dword address boundary
    1.14  	    __ move(T1, 4); 
    1.15  	    __ sub(T1, T1, T3); 
    1.16 @@ -711,6 +717,7 @@
    1.17    //
    1.18    address generate_conjoint_byte_copy(bool aligned, const char *name) {
    1.19  		Label l_1, l_2, l_3, l_4, l_5;
    1.20 +		Label l_unaligned, l_aligned;
    1.21  		StubCodeMark mark(this, "StubRoutines", name);
    1.22  		__ align(CodeEntryAlignment);
    1.23  		address start = __ pc();
    1.24 @@ -725,19 +732,42 @@
    1.25  		__ push(T1);	
    1.26  		__ push(T8);	
    1.27  
    1.28 -
    1.29  		// copy from high to low
    1.30  		__ move(T3, A0); 
    1.31  		__ move(T0, A1);
    1.32  		__ move(T1, A2);  
    1.33 -		__ dadd(AT, T3, T1);  
    1.34 -		__ lea(T3, Address(AT, -4));
    1.35 -		__ dadd(AT, T0, T1);  
    1.36 -		__ lea(T0, Address(AT, -4));
    1.37 +		__ dadd(T3, T3, T1);  
    1.38 +		__ dadd(T0, T0, T1);  
    1.39 +
    1.40 +		// 2016/5/8 Jin: copy starting unalinged bytes
    1.41 +		__ bind(l_unaligned);
    1.42 +		__ beq(T1, R0, l_5); 
    1.43 +		__ delayed()->nop(); 
    1.44 +
    1.45 +		__ andi(AT, T3, 3);    
    1.46 +		__ beq(AT, R0, l_aligned); 
    1.47 +		__ delayed()->nop(); 
    1.48 +		__ lb(AT, T3, -1);   
    1.49 +		__ sb(AT, T0, -1); 
    1.50 +		__ daddi(AT, T1, -1); 
    1.51 +		__ daddi(AT, T3, -1); 
    1.52 +		__ daddi(AT, T0, -1); 
    1.53 +		__ b(l_unaligned); 
    1.54 +		__ delayed()->nop(); 
    1.55 +		
    1.56 +		// now T0, T3 point to 4-byte aligned high-ends
    1.57 +		//     T1 contains byte count that is not copied.
    1.58 +		__ bind(l_aligned);
    1.59 +
    1.60  		__ move(T8, T1); 
    1.61  		__ daddi(AT, T1, -3); 
    1.62  		__ blez(AT, l_3); 
    1.63  		__ delayed()->nop();	
    1.64 +
    1.65 +		__ andi(T8, T8, 3); 
    1.66 +		__ lea(T3, Address(T3, -4));
    1.67 +		__ lea(T0, Address(T0, -4));
    1.68 +
    1.69  		__ dsrl(T1, T1, 2); 
    1.70  		__ align(16);
    1.71  		__ bind(l_1);

mercurial